diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..b88b120968faa542df952b9b44ce872ad87a59d5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-3900/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-4048/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fdcf0cdcbe25920e4d02e4b18d60c02bc3f52ded --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +library_name: peft +license: other +base_model: Qwen/Qwen2.5-VL-7B-Instruct +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +pipeline_tag: text-generation +model-index: +- name: Qwen2.5-VL-7B-sft-valid + results: [] +--- + + + +# Qwen2.5-VL-7B-sft-valid + +This model is a fine-tuned version of [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) on the agent_sft_valid dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-05 +- train_batch_size: 4 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- gradient_accumulation_steps: 4 +- total_train_batch_size: 16 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 0.05 +- num_epochs: 2 + +### Training results + + + +### Framework versions + +- PEFT 0.18.1 +- Transformers 5.2.0 +- Pytorch 2.5.1+cu124 +- Datasets 4.0.0 +- Tokenizers 0.22.2 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6cf16f3266f5592ee03447db73cafc0bd600786e --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.25.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.13.mlp.down_proj", + "layers.20.mlp.gate_proj", + "layers.10.mlp.down_proj", + "layers.27.mlp.down_proj", + "layers.10.mlp.up_proj", + "layers.7.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.15.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.3.mlp.gate_proj", + "layers.14.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.22.mlp.up_proj", + "layers.27.mlp.up_proj", + "layers.12.mlp.up_proj", + "layers.4.mlp.up_proj", + "layers.19.mlp.up_proj", + "layers.11.mlp.up_proj", + "layers.10.mlp.gate_proj", + "layers.15.mlp.up_proj", + "layers.20.mlp.down_proj", + "layers.4.mlp.down_proj", + "layers.3.mlp.up_proj", + "layers.5.mlp.gate_proj", + "layers.4.mlp.gate_proj", + "layers.5.mlp.down_proj", + "layers.6.mlp.up_proj", + "o_proj", + "layers.24.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.25.mlp.down_proj", + "layers.16.mlp.down_proj", + "layers.12.mlp.gate_proj", + "layers.19.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.21.mlp.gate_proj", + "layers.0.mlp.down_proj", + "layers.18.mlp.up_proj", + "layers.15.mlp.gate_proj", + "layers.8.mlp.gate_proj", + "layers.13.mlp.gate_proj", + "layers.11.mlp.down_proj", + "layers.11.mlp.gate_proj", + "layers.7.mlp.gate_proj", + "layers.17.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.24.mlp.gate_proj", + "layers.16.mlp.gate_proj", + "layers.20.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.22.mlp.down_proj", + "layers.6.mlp.gate_proj", + "layers.0.mlp.gate_proj", + "layers.5.mlp.up_proj", + "layers.18.mlp.gate_proj", + "k_proj", + "layers.9.mlp.gate_proj", + "layers.23.mlp.down_proj", + "layers.26.mlp.down_proj", + "layers.19.mlp.down_proj", + "layers.22.mlp.gate_proj", + "layers.13.mlp.up_proj", + "v_proj", + "layers.6.mlp.down_proj", + "q_proj", + "layers.27.mlp.gate_proj", + "layers.9.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.14.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.17.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.0.mlp.up_proj", + "layers.26.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.2.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.16.mlp.up_proj", + "layers.12.mlp.down_proj", + "layers.2.mlp.up_proj", + "layers.1.mlp.up_proj", + "layers.8.mlp.down_proj", + "layers.3.mlp.down_proj", + "layers.24.mlp.up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90b38b84542e94f6452ac08ffb8255ea56ac05c6 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6852ab96cd7ce0b54e64dd7759da8510a4a13074411e19fe8276f4d698378316 +size 323020440 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..094b5f407da3f532ec6bb52f7e804534ce26d83d --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.0, + "total_flos": 3066106945339392.0, + "train_loss": 0.3191354194832708, + "train_runtime": 52842.7617, + "train_samples_per_second": 1.225, + "train_steps_per_second": 0.077 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-3900/README.md b/checkpoint-3900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-3900/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-3900/adapter_config.json b/checkpoint-3900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6cf16f3266f5592ee03447db73cafc0bd600786e --- /dev/null +++ b/checkpoint-3900/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.25.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.13.mlp.down_proj", + "layers.20.mlp.gate_proj", + "layers.10.mlp.down_proj", + "layers.27.mlp.down_proj", + "layers.10.mlp.up_proj", + "layers.7.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.15.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.3.mlp.gate_proj", + "layers.14.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.22.mlp.up_proj", + "layers.27.mlp.up_proj", + "layers.12.mlp.up_proj", + "layers.4.mlp.up_proj", + "layers.19.mlp.up_proj", + "layers.11.mlp.up_proj", + "layers.10.mlp.gate_proj", + "layers.15.mlp.up_proj", + "layers.20.mlp.down_proj", + "layers.4.mlp.down_proj", + "layers.3.mlp.up_proj", + "layers.5.mlp.gate_proj", + "layers.4.mlp.gate_proj", + "layers.5.mlp.down_proj", + "layers.6.mlp.up_proj", + "o_proj", + "layers.24.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.25.mlp.down_proj", + "layers.16.mlp.down_proj", + "layers.12.mlp.gate_proj", + "layers.19.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.21.mlp.gate_proj", + "layers.0.mlp.down_proj", + "layers.18.mlp.up_proj", + "layers.15.mlp.gate_proj", + "layers.8.mlp.gate_proj", + "layers.13.mlp.gate_proj", + "layers.11.mlp.down_proj", + "layers.11.mlp.gate_proj", + "layers.7.mlp.gate_proj", + "layers.17.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.24.mlp.gate_proj", + "layers.16.mlp.gate_proj", + "layers.20.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.22.mlp.down_proj", + "layers.6.mlp.gate_proj", + "layers.0.mlp.gate_proj", + "layers.5.mlp.up_proj", + "layers.18.mlp.gate_proj", + "k_proj", + "layers.9.mlp.gate_proj", + "layers.23.mlp.down_proj", + "layers.26.mlp.down_proj", + "layers.19.mlp.down_proj", + "layers.22.mlp.gate_proj", + "layers.13.mlp.up_proj", + "v_proj", + "layers.6.mlp.down_proj", + "q_proj", + "layers.27.mlp.gate_proj", + "layers.9.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.14.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.17.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.0.mlp.up_proj", + "layers.26.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.2.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.16.mlp.up_proj", + "layers.12.mlp.down_proj", + "layers.2.mlp.up_proj", + "layers.1.mlp.up_proj", + "layers.8.mlp.down_proj", + "layers.3.mlp.down_proj", + "layers.24.mlp.up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-3900/adapter_model.safetensors b/checkpoint-3900/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22d2918298f610110783abe02cdbea7bbbb02729 --- /dev/null +++ b/checkpoint-3900/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0fba6d276e8aa2d760538bb3b476ae6df145ec197d50cc024009bec32b1bcd6 +size 323020440 diff --git a/checkpoint-3900/chat_template.jinja b/checkpoint-3900/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-3900/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-3900/global_step3900/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-3900/global_step3900/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b22d2713b7f567206ec1b81496e51f5686aa711 --- /dev/null +++ b/checkpoint-3900/global_step3900/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79710d77ba2010056a70d80cf6f5301242a70c789f7366893ce26939143bf7d0 +size 1937772272 diff --git a/checkpoint-3900/global_step3900/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-3900/global_step3900/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f15f30dc8cabf833367174206336c5667d6b4ba9 --- /dev/null +++ b/checkpoint-3900/global_step3900/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68bc0fc82b529f9eb033dc12956156c00da490f5ccf0c96ed66c474819ca0bf +size 460630 diff --git a/checkpoint-3900/latest b/checkpoint-3900/latest new file mode 100644 index 0000000000000000000000000000000000000000..736a1ee16dea29a0e133819af30275292674da01 --- /dev/null +++ b/checkpoint-3900/latest @@ -0,0 +1 @@ +global_step3900 \ No newline at end of file diff --git a/checkpoint-3900/processor_config.json b/checkpoint-3900/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-3900/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-3900/rng_state.pth b/checkpoint-3900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fba80d40f89baa9286722b48c3ee82cfa89287d3 --- /dev/null +++ b/checkpoint-3900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8ba0dccaee5da261e27a4ad3bc7d586c91db2e035a333feddbc9e43bef1a5d7 +size 14244 diff --git a/checkpoint-3900/scheduler.pt b/checkpoint-3900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..68cf56bfafbf6d64a05244aa1042eef84dfe6903 --- /dev/null +++ b/checkpoint-3900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ef415b27093ddae869d31af467a071c34f87f35c0d5ba33bd84cc48b769cda +size 1000 diff --git a/checkpoint-3900/tokenizer.json b/checkpoint-3900/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-3900/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-3900/tokenizer_config.json b/checkpoint-3900/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-3900/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-3900/trainer_state.json b/checkpoint-3900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..12ac202ce4459300aae32d86de1ac54ffd9e55f8 --- /dev/null +++ b/checkpoint-3900/trainer_state.json @@ -0,0 +1,27334 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9272210552329172, + "eval_steps": 500, + "global_step": 3900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0004942542938341777, + "grad_norm": 1.7827389996067007, + "learning_rate": 0.0, + "loss": 1.1816105842590332, + "step": 1 + }, + { + "epoch": 0.0009885085876683553, + "grad_norm": 1.891128580111598, + "learning_rate": 9.852216748768474e-08, + "loss": 1.1496102809906006, + "step": 2 + }, + { + "epoch": 0.001482762881502533, + "grad_norm": 1.8581340535316004, + "learning_rate": 1.9704433497536947e-07, + "loss": 1.1515967845916748, + "step": 3 + }, + { + "epoch": 0.0019770171753367106, + "grad_norm": 1.708604556953044, + "learning_rate": 2.955665024630542e-07, + "loss": 1.1795943975448608, + "step": 4 + }, + { + "epoch": 0.0024712714691708885, + "grad_norm": 1.8513528590958555, + "learning_rate": 3.9408866995073894e-07, + "loss": 1.2289564609527588, + "step": 5 + }, + { + "epoch": 0.002965525763005066, + "grad_norm": 1.972324289049384, + "learning_rate": 4.926108374384237e-07, + "loss": 1.179269790649414, + "step": 6 + }, + { + "epoch": 0.003459780056839244, + "grad_norm": 1.8334156798400192, + "learning_rate": 5.911330049261084e-07, + "loss": 1.199608564376831, + "step": 7 + }, + { + "epoch": 0.003954034350673421, + "grad_norm": 1.6669436389627912, + "learning_rate": 6.896551724137931e-07, + "loss": 1.1643707752227783, + "step": 8 + }, + { + "epoch": 0.004448288644507599, + "grad_norm": 1.8750060934609654, + "learning_rate": 7.881773399014779e-07, + "loss": 1.1264240741729736, + "step": 9 + }, + { + "epoch": 0.004942542938341777, + "grad_norm": 1.9962482953672744, + "learning_rate": 8.866995073891626e-07, + "loss": 1.1717555522918701, + "step": 10 + }, + { + "epoch": 0.005436797232175955, + "grad_norm": 1.895693583554434, + "learning_rate": 9.852216748768474e-07, + "loss": 1.1856712102890015, + "step": 11 + }, + { + "epoch": 0.005931051526010132, + "grad_norm": 1.7765248738469863, + "learning_rate": 1.0837438423645322e-06, + "loss": 1.1258785724639893, + "step": 12 + }, + { + "epoch": 0.00642530581984431, + "grad_norm": 1.8326605479421993, + "learning_rate": 1.1822660098522167e-06, + "loss": 1.1333656311035156, + "step": 13 + }, + { + "epoch": 0.006919560113678488, + "grad_norm": 1.9142537067819894, + "learning_rate": 1.2807881773399017e-06, + "loss": 1.2281363010406494, + "step": 14 + }, + { + "epoch": 0.0074138144075126654, + "grad_norm": 1.9232318367357442, + "learning_rate": 1.3793103448275862e-06, + "loss": 1.1910676956176758, + "step": 15 + }, + { + "epoch": 0.007908068701346842, + "grad_norm": 2.5599273269087885, + "learning_rate": 1.4778325123152712e-06, + "loss": 1.2124552726745605, + "step": 16 + }, + { + "epoch": 0.008402322995181021, + "grad_norm": 2.2109761155287133, + "learning_rate": 1.5763546798029558e-06, + "loss": 1.1993463039398193, + "step": 17 + }, + { + "epoch": 0.008896577289015198, + "grad_norm": 2.1999117305307077, + "learning_rate": 1.6748768472906405e-06, + "loss": 1.1245683431625366, + "step": 18 + }, + { + "epoch": 0.009390831582849375, + "grad_norm": 2.203478389299074, + "learning_rate": 1.7733990147783253e-06, + "loss": 1.1838568449020386, + "step": 19 + }, + { + "epoch": 0.009885085876683554, + "grad_norm": 2.419107047950166, + "learning_rate": 1.8719211822660098e-06, + "loss": 1.081169843673706, + "step": 20 + }, + { + "epoch": 0.010379340170517731, + "grad_norm": 2.559921706815215, + "learning_rate": 1.970443349753695e-06, + "loss": 1.1506569385528564, + "step": 21 + }, + { + "epoch": 0.01087359446435191, + "grad_norm": 2.8697838151244977, + "learning_rate": 2.0689655172413796e-06, + "loss": 1.0841327905654907, + "step": 22 + }, + { + "epoch": 0.011367848758186087, + "grad_norm": 2.8012936510978905, + "learning_rate": 2.1674876847290643e-06, + "loss": 1.1335525512695312, + "step": 23 + }, + { + "epoch": 0.011862103052020264, + "grad_norm": 2.649521736906966, + "learning_rate": 2.266009852216749e-06, + "loss": 1.035188913345337, + "step": 24 + }, + { + "epoch": 0.012356357345854442, + "grad_norm": 2.7385314170591166, + "learning_rate": 2.3645320197044334e-06, + "loss": 1.0640877485275269, + "step": 25 + }, + { + "epoch": 0.01285061163968862, + "grad_norm": 2.5011806151261755, + "learning_rate": 2.4630541871921186e-06, + "loss": 1.0479273796081543, + "step": 26 + }, + { + "epoch": 0.013344865933522798, + "grad_norm": 2.236670838822209, + "learning_rate": 2.5615763546798034e-06, + "loss": 1.0522505044937134, + "step": 27 + }, + { + "epoch": 0.013839120227356975, + "grad_norm": 2.065544668093392, + "learning_rate": 2.660098522167488e-06, + "loss": 1.080836296081543, + "step": 28 + }, + { + "epoch": 0.014333374521191152, + "grad_norm": 1.7478242928012908, + "learning_rate": 2.7586206896551725e-06, + "loss": 0.9712544679641724, + "step": 29 + }, + { + "epoch": 0.014827628815025331, + "grad_norm": 1.5930614486695707, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.0469061136245728, + "step": 30 + }, + { + "epoch": 0.015321883108859508, + "grad_norm": 1.380137621152324, + "learning_rate": 2.9556650246305424e-06, + "loss": 0.9911116361618042, + "step": 31 + }, + { + "epoch": 0.015816137402693685, + "grad_norm": 1.3167918112915387, + "learning_rate": 3.054187192118227e-06, + "loss": 0.9552959203720093, + "step": 32 + }, + { + "epoch": 0.016310391696527864, + "grad_norm": 1.2266567383194062, + "learning_rate": 3.1527093596059115e-06, + "loss": 0.957429051399231, + "step": 33 + }, + { + "epoch": 0.016804645990362042, + "grad_norm": 1.305011449405004, + "learning_rate": 3.2512315270935963e-06, + "loss": 1.0180628299713135, + "step": 34 + }, + { + "epoch": 0.017298900284196218, + "grad_norm": 1.2347397961596738, + "learning_rate": 3.349753694581281e-06, + "loss": 0.9064415097236633, + "step": 35 + }, + { + "epoch": 0.017793154578030396, + "grad_norm": 1.216758814553776, + "learning_rate": 3.448275862068966e-06, + "loss": 0.9718184471130371, + "step": 36 + }, + { + "epoch": 0.018287408871864575, + "grad_norm": 1.065779121444896, + "learning_rate": 3.5467980295566506e-06, + "loss": 0.8831444978713989, + "step": 37 + }, + { + "epoch": 0.01878166316569875, + "grad_norm": 1.0132491929086573, + "learning_rate": 3.6453201970443354e-06, + "loss": 0.9167139530181885, + "step": 38 + }, + { + "epoch": 0.01927591745953293, + "grad_norm": 1.0431186403983612, + "learning_rate": 3.7438423645320197e-06, + "loss": 0.9322037696838379, + "step": 39 + }, + { + "epoch": 0.019770171753367108, + "grad_norm": 1.0319066435292568, + "learning_rate": 3.842364532019705e-06, + "loss": 0.9189817905426025, + "step": 40 + }, + { + "epoch": 0.020264426047201287, + "grad_norm": 1.1670657884595383, + "learning_rate": 3.94088669950739e-06, + "loss": 0.8480448126792908, + "step": 41 + }, + { + "epoch": 0.020758680341035462, + "grad_norm": 0.9850175889441174, + "learning_rate": 4.039408866995074e-06, + "loss": 0.8907301425933838, + "step": 42 + }, + { + "epoch": 0.02125293463486964, + "grad_norm": 1.0028387912933743, + "learning_rate": 4.137931034482759e-06, + "loss": 0.8674390316009521, + "step": 43 + }, + { + "epoch": 0.02174718892870382, + "grad_norm": 0.9822966394815191, + "learning_rate": 4.236453201970444e-06, + "loss": 0.8674882054328918, + "step": 44 + }, + { + "epoch": 0.022241443222537995, + "grad_norm": 0.9778327665239519, + "learning_rate": 4.334975369458129e-06, + "loss": 0.8542560338973999, + "step": 45 + }, + { + "epoch": 0.022735697516372173, + "grad_norm": 0.8621828386281931, + "learning_rate": 4.4334975369458135e-06, + "loss": 0.772778332233429, + "step": 46 + }, + { + "epoch": 0.023229951810206352, + "grad_norm": 0.8638093364937629, + "learning_rate": 4.532019704433498e-06, + "loss": 0.7481152415275574, + "step": 47 + }, + { + "epoch": 0.023724206104040527, + "grad_norm": 0.8467972866728939, + "learning_rate": 4.630541871921182e-06, + "loss": 0.8373709917068481, + "step": 48 + }, + { + "epoch": 0.024218460397874706, + "grad_norm": 0.8165134857986008, + "learning_rate": 4.729064039408867e-06, + "loss": 0.8163385391235352, + "step": 49 + }, + { + "epoch": 0.024712714691708885, + "grad_norm": 0.833026336683437, + "learning_rate": 4.8275862068965525e-06, + "loss": 0.7444975972175598, + "step": 50 + }, + { + "epoch": 0.025206968985543064, + "grad_norm": 0.858591041664589, + "learning_rate": 4.926108374384237e-06, + "loss": 0.7683243751525879, + "step": 51 + }, + { + "epoch": 0.02570122327937724, + "grad_norm": 1.0127725906591662, + "learning_rate": 5.024630541871922e-06, + "loss": 0.806761622428894, + "step": 52 + }, + { + "epoch": 0.026195477573211418, + "grad_norm": 0.8333649125881921, + "learning_rate": 5.123152709359607e-06, + "loss": 0.7312102913856506, + "step": 53 + }, + { + "epoch": 0.026689731867045596, + "grad_norm": 0.9425883709792775, + "learning_rate": 5.2216748768472915e-06, + "loss": 0.7351999282836914, + "step": 54 + }, + { + "epoch": 0.02718398616087977, + "grad_norm": 0.9039627787948463, + "learning_rate": 5.320197044334976e-06, + "loss": 0.7453763484954834, + "step": 55 + }, + { + "epoch": 0.02767824045471395, + "grad_norm": 0.9324665454088699, + "learning_rate": 5.41871921182266e-06, + "loss": 0.7063292860984802, + "step": 56 + }, + { + "epoch": 0.02817249474854813, + "grad_norm": 0.8343256198457882, + "learning_rate": 5.517241379310345e-06, + "loss": 0.7145994901657104, + "step": 57 + }, + { + "epoch": 0.028666749042382304, + "grad_norm": 0.7157092163314197, + "learning_rate": 5.61576354679803e-06, + "loss": 0.687594473361969, + "step": 58 + }, + { + "epoch": 0.029161003336216483, + "grad_norm": 0.7603582128739335, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.6643895506858826, + "step": 59 + }, + { + "epoch": 0.029655257630050662, + "grad_norm": 0.6925073694472516, + "learning_rate": 5.812807881773399e-06, + "loss": 0.6781614422798157, + "step": 60 + }, + { + "epoch": 0.030149511923884837, + "grad_norm": 0.7169709854131228, + "learning_rate": 5.911330049261085e-06, + "loss": 0.6209158301353455, + "step": 61 + }, + { + "epoch": 0.030643766217719016, + "grad_norm": 0.6749920715098945, + "learning_rate": 6.00985221674877e-06, + "loss": 0.6424679756164551, + "step": 62 + }, + { + "epoch": 0.031138020511553195, + "grad_norm": 0.6435584468821339, + "learning_rate": 6.108374384236454e-06, + "loss": 0.6745971441268921, + "step": 63 + }, + { + "epoch": 0.03163227480538737, + "grad_norm": 0.657544191989632, + "learning_rate": 6.206896551724138e-06, + "loss": 0.6520330905914307, + "step": 64 + }, + { + "epoch": 0.03212652909922155, + "grad_norm": 0.6351335823908374, + "learning_rate": 6.305418719211823e-06, + "loss": 0.6790571212768555, + "step": 65 + }, + { + "epoch": 0.03262078339305573, + "grad_norm": 0.6484215339353426, + "learning_rate": 6.403940886699508e-06, + "loss": 0.6491506099700928, + "step": 66 + }, + { + "epoch": 0.033115037686889906, + "grad_norm": 0.617685895397393, + "learning_rate": 6.502463054187193e-06, + "loss": 0.6347313523292542, + "step": 67 + }, + { + "epoch": 0.033609291980724085, + "grad_norm": 0.6638567270691007, + "learning_rate": 6.600985221674877e-06, + "loss": 0.6785881519317627, + "step": 68 + }, + { + "epoch": 0.034103546274558263, + "grad_norm": 0.6459369268846485, + "learning_rate": 6.699507389162562e-06, + "loss": 0.6470085978507996, + "step": 69 + }, + { + "epoch": 0.034597800568392435, + "grad_norm": 0.6364523697931875, + "learning_rate": 6.798029556650246e-06, + "loss": 0.6205961108207703, + "step": 70 + }, + { + "epoch": 0.035092054862226614, + "grad_norm": 0.6434045969551643, + "learning_rate": 6.896551724137932e-06, + "loss": 0.6621580123901367, + "step": 71 + }, + { + "epoch": 0.03558630915606079, + "grad_norm": 0.6281362500041567, + "learning_rate": 6.995073891625616e-06, + "loss": 0.6363088488578796, + "step": 72 + }, + { + "epoch": 0.03608056344989497, + "grad_norm": 0.6023389614758552, + "learning_rate": 7.093596059113301e-06, + "loss": 0.6073004007339478, + "step": 73 + }, + { + "epoch": 0.03657481774372915, + "grad_norm": 0.5962790573618366, + "learning_rate": 7.192118226600986e-06, + "loss": 0.6490880846977234, + "step": 74 + }, + { + "epoch": 0.03706907203756333, + "grad_norm": 0.6425224117743127, + "learning_rate": 7.290640394088671e-06, + "loss": 0.6540624499320984, + "step": 75 + }, + { + "epoch": 0.0375633263313975, + "grad_norm": 0.6885040620745063, + "learning_rate": 7.3891625615763555e-06, + "loss": 0.6237976551055908, + "step": 76 + }, + { + "epoch": 0.03805758062523168, + "grad_norm": 0.6110947192931153, + "learning_rate": 7.487684729064039e-06, + "loss": 0.6121219992637634, + "step": 77 + }, + { + "epoch": 0.03855183491906586, + "grad_norm": 0.6031847840211293, + "learning_rate": 7.586206896551724e-06, + "loss": 0.5785888433456421, + "step": 78 + }, + { + "epoch": 0.03904608921290004, + "grad_norm": 0.645073431050071, + "learning_rate": 7.68472906403941e-06, + "loss": 0.6144810914993286, + "step": 79 + }, + { + "epoch": 0.039540343506734216, + "grad_norm": 0.709404375816405, + "learning_rate": 7.783251231527095e-06, + "loss": 0.6522500514984131, + "step": 80 + }, + { + "epoch": 0.040034597800568394, + "grad_norm": 0.6784602446095636, + "learning_rate": 7.88177339901478e-06, + "loss": 0.6126501560211182, + "step": 81 + }, + { + "epoch": 0.04052885209440257, + "grad_norm": 0.6834338295248128, + "learning_rate": 7.980295566502464e-06, + "loss": 0.573388934135437, + "step": 82 + }, + { + "epoch": 0.041023106388236745, + "grad_norm": 0.7128627750045655, + "learning_rate": 8.078817733990149e-06, + "loss": 0.6462322473526001, + "step": 83 + }, + { + "epoch": 0.041517360682070924, + "grad_norm": 0.6985575396830678, + "learning_rate": 8.177339901477834e-06, + "loss": 0.6542905569076538, + "step": 84 + }, + { + "epoch": 0.0420116149759051, + "grad_norm": 0.6800738258763197, + "learning_rate": 8.275862068965518e-06, + "loss": 0.6539976000785828, + "step": 85 + }, + { + "epoch": 0.04250586926973928, + "grad_norm": 0.6805451756514653, + "learning_rate": 8.374384236453203e-06, + "loss": 0.6303049325942993, + "step": 86 + }, + { + "epoch": 0.04300012356357346, + "grad_norm": 0.6262637687675628, + "learning_rate": 8.472906403940888e-06, + "loss": 0.5727078318595886, + "step": 87 + }, + { + "epoch": 0.04349437785740764, + "grad_norm": 0.6392194157453778, + "learning_rate": 8.571428571428571e-06, + "loss": 0.6204914450645447, + "step": 88 + }, + { + "epoch": 0.04398863215124181, + "grad_norm": 0.8144620373591464, + "learning_rate": 8.669950738916257e-06, + "loss": 0.633359432220459, + "step": 89 + }, + { + "epoch": 0.04448288644507599, + "grad_norm": 0.6564252660453104, + "learning_rate": 8.768472906403942e-06, + "loss": 0.5737719535827637, + "step": 90 + }, + { + "epoch": 0.04497714073891017, + "grad_norm": 0.704224097621618, + "learning_rate": 8.866995073891627e-06, + "loss": 0.6438707709312439, + "step": 91 + }, + { + "epoch": 0.04547139503274435, + "grad_norm": 0.7123681566966987, + "learning_rate": 8.965517241379312e-06, + "loss": 0.6284823417663574, + "step": 92 + }, + { + "epoch": 0.045965649326578525, + "grad_norm": 0.6879682376399587, + "learning_rate": 9.064039408866996e-06, + "loss": 0.6442058086395264, + "step": 93 + }, + { + "epoch": 0.046459903620412704, + "grad_norm": 0.709934515039082, + "learning_rate": 9.162561576354681e-06, + "loss": 0.5821751356124878, + "step": 94 + }, + { + "epoch": 0.04695415791424688, + "grad_norm": 1.530236961676562, + "learning_rate": 9.261083743842364e-06, + "loss": 0.546042263507843, + "step": 95 + }, + { + "epoch": 0.047448412208081055, + "grad_norm": 0.6844457378175872, + "learning_rate": 9.359605911330049e-06, + "loss": 0.5743244886398315, + "step": 96 + }, + { + "epoch": 0.04794266650191523, + "grad_norm": 0.6876016450255833, + "learning_rate": 9.458128078817734e-06, + "loss": 0.5775831341743469, + "step": 97 + }, + { + "epoch": 0.04843692079574941, + "grad_norm": 0.6367125491834975, + "learning_rate": 9.55665024630542e-06, + "loss": 0.5632016658782959, + "step": 98 + }, + { + "epoch": 0.04893117508958359, + "grad_norm": 0.635357516984843, + "learning_rate": 9.655172413793105e-06, + "loss": 0.5817564129829407, + "step": 99 + }, + { + "epoch": 0.04942542938341777, + "grad_norm": 0.6380730461382318, + "learning_rate": 9.75369458128079e-06, + "loss": 0.5692225098609924, + "step": 100 + }, + { + "epoch": 0.04991968367725195, + "grad_norm": 0.6016319910280624, + "learning_rate": 9.852216748768475e-06, + "loss": 0.5239434242248535, + "step": 101 + }, + { + "epoch": 0.05041393797108613, + "grad_norm": 0.6757811368400487, + "learning_rate": 9.95073891625616e-06, + "loss": 0.543138861656189, + "step": 102 + }, + { + "epoch": 0.0509081922649203, + "grad_norm": 0.6907500926239555, + "learning_rate": 1.0049261083743844e-05, + "loss": 0.5914052128791809, + "step": 103 + }, + { + "epoch": 0.05140244655875448, + "grad_norm": 0.657964391130701, + "learning_rate": 1.0147783251231529e-05, + "loss": 0.5394442081451416, + "step": 104 + }, + { + "epoch": 0.051896700852588656, + "grad_norm": 0.6411875370567456, + "learning_rate": 1.0246305418719214e-05, + "loss": 0.6157902479171753, + "step": 105 + }, + { + "epoch": 0.052390955146422835, + "grad_norm": 0.738818036033501, + "learning_rate": 1.0344827586206898e-05, + "loss": 0.5863415598869324, + "step": 106 + }, + { + "epoch": 0.052885209440257014, + "grad_norm": 0.7066380161278255, + "learning_rate": 1.0443349753694583e-05, + "loss": 0.5783145427703857, + "step": 107 + }, + { + "epoch": 0.05337946373409119, + "grad_norm": 0.6486663261886427, + "learning_rate": 1.0541871921182268e-05, + "loss": 0.5761469006538391, + "step": 108 + }, + { + "epoch": 0.053873718027925364, + "grad_norm": 0.7011826885785277, + "learning_rate": 1.0640394088669953e-05, + "loss": 0.5931205749511719, + "step": 109 + }, + { + "epoch": 0.05436797232175954, + "grad_norm": 0.6624296231637669, + "learning_rate": 1.0738916256157637e-05, + "loss": 0.5429986119270325, + "step": 110 + }, + { + "epoch": 0.05486222661559372, + "grad_norm": 0.758180242025479, + "learning_rate": 1.083743842364532e-05, + "loss": 0.5154455304145813, + "step": 111 + }, + { + "epoch": 0.0553564809094279, + "grad_norm": 0.6631694030017043, + "learning_rate": 1.0935960591133005e-05, + "loss": 0.5465028285980225, + "step": 112 + }, + { + "epoch": 0.05585073520326208, + "grad_norm": 0.7234030186547562, + "learning_rate": 1.103448275862069e-05, + "loss": 0.5973349213600159, + "step": 113 + }, + { + "epoch": 0.05634498949709626, + "grad_norm": 0.8062494007312124, + "learning_rate": 1.1133004926108375e-05, + "loss": 0.6201578378677368, + "step": 114 + }, + { + "epoch": 0.05683924379093044, + "grad_norm": 0.7754913697435033, + "learning_rate": 1.123152709359606e-05, + "loss": 0.5090143084526062, + "step": 115 + }, + { + "epoch": 0.05733349808476461, + "grad_norm": 0.7128751966577052, + "learning_rate": 1.1330049261083744e-05, + "loss": 0.5275869369506836, + "step": 116 + }, + { + "epoch": 0.05782775237859879, + "grad_norm": 0.6950533949454222, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.571302056312561, + "step": 117 + }, + { + "epoch": 0.058322006672432966, + "grad_norm": 0.727683614551879, + "learning_rate": 1.1527093596059114e-05, + "loss": 0.5920293927192688, + "step": 118 + }, + { + "epoch": 0.058816260966267145, + "grad_norm": 0.7151674344713859, + "learning_rate": 1.1625615763546799e-05, + "loss": 0.5877068042755127, + "step": 119 + }, + { + "epoch": 0.059310515260101324, + "grad_norm": 0.7467125629300125, + "learning_rate": 1.1724137931034483e-05, + "loss": 0.6140042543411255, + "step": 120 + }, + { + "epoch": 0.0598047695539355, + "grad_norm": 0.7531213899377466, + "learning_rate": 1.182266009852217e-05, + "loss": 0.5642052292823792, + "step": 121 + }, + { + "epoch": 0.060299023847769674, + "grad_norm": 0.7258097143889621, + "learning_rate": 1.1921182266009855e-05, + "loss": 0.5535261034965515, + "step": 122 + }, + { + "epoch": 0.06079327814160385, + "grad_norm": 0.6906824437380253, + "learning_rate": 1.201970443349754e-05, + "loss": 0.5202849507331848, + "step": 123 + }, + { + "epoch": 0.06128753243543803, + "grad_norm": 0.7290752273219125, + "learning_rate": 1.2118226600985224e-05, + "loss": 0.5626791715621948, + "step": 124 + }, + { + "epoch": 0.06178178672927221, + "grad_norm": 0.6770400510110369, + "learning_rate": 1.2216748768472909e-05, + "loss": 0.5416101217269897, + "step": 125 + }, + { + "epoch": 0.06227604102310639, + "grad_norm": 0.730080694043851, + "learning_rate": 1.2315270935960592e-05, + "loss": 0.5683388710021973, + "step": 126 + }, + { + "epoch": 0.06277029531694056, + "grad_norm": 0.7617011668537459, + "learning_rate": 1.2413793103448277e-05, + "loss": 0.564468264579773, + "step": 127 + }, + { + "epoch": 0.06326454961077474, + "grad_norm": 0.7085057216007719, + "learning_rate": 1.2512315270935961e-05, + "loss": 0.5419844388961792, + "step": 128 + }, + { + "epoch": 0.06375880390460892, + "grad_norm": 0.7653624040034734, + "learning_rate": 1.2610837438423646e-05, + "loss": 0.51283860206604, + "step": 129 + }, + { + "epoch": 0.0642530581984431, + "grad_norm": 0.8138449595397697, + "learning_rate": 1.2709359605911331e-05, + "loss": 0.5807296633720398, + "step": 130 + }, + { + "epoch": 0.06474731249227728, + "grad_norm": 0.6723079879875923, + "learning_rate": 1.2807881773399016e-05, + "loss": 0.5277815461158752, + "step": 131 + }, + { + "epoch": 0.06524156678611145, + "grad_norm": 0.6681532618442926, + "learning_rate": 1.29064039408867e-05, + "loss": 0.5044680833816528, + "step": 132 + }, + { + "epoch": 0.06573582107994563, + "grad_norm": 0.753382083900827, + "learning_rate": 1.3004926108374385e-05, + "loss": 0.5412886738777161, + "step": 133 + }, + { + "epoch": 0.06623007537377981, + "grad_norm": 0.7168767227212489, + "learning_rate": 1.310344827586207e-05, + "loss": 0.5314532518386841, + "step": 134 + }, + { + "epoch": 0.06672432966761399, + "grad_norm": 0.8393067756176276, + "learning_rate": 1.3201970443349755e-05, + "loss": 0.5544138550758362, + "step": 135 + }, + { + "epoch": 0.06721858396144817, + "grad_norm": 0.7720251101355328, + "learning_rate": 1.330049261083744e-05, + "loss": 0.5745705366134644, + "step": 136 + }, + { + "epoch": 0.06771283825528235, + "grad_norm": 0.8433611027798503, + "learning_rate": 1.3399014778325124e-05, + "loss": 0.5361800789833069, + "step": 137 + }, + { + "epoch": 0.06820709254911653, + "grad_norm": 0.7945865329579561, + "learning_rate": 1.3497536945812807e-05, + "loss": 0.5878221392631531, + "step": 138 + }, + { + "epoch": 0.06870134684295069, + "grad_norm": 0.7847520309491554, + "learning_rate": 1.3596059113300492e-05, + "loss": 0.5952787399291992, + "step": 139 + }, + { + "epoch": 0.06919560113678487, + "grad_norm": 0.7556944357281568, + "learning_rate": 1.369458128078818e-05, + "loss": 0.5334340929985046, + "step": 140 + }, + { + "epoch": 0.06968985543061905, + "grad_norm": 0.7730405260844581, + "learning_rate": 1.3793103448275863e-05, + "loss": 0.5297533273696899, + "step": 141 + }, + { + "epoch": 0.07018410972445323, + "grad_norm": 0.7838373123609123, + "learning_rate": 1.3891625615763548e-05, + "loss": 0.5388105511665344, + "step": 142 + }, + { + "epoch": 0.0706783640182874, + "grad_norm": 0.6827867428906486, + "learning_rate": 1.3990147783251233e-05, + "loss": 0.484375536441803, + "step": 143 + }, + { + "epoch": 0.07117261831212159, + "grad_norm": 0.7377838543831393, + "learning_rate": 1.4088669950738918e-05, + "loss": 0.5395358800888062, + "step": 144 + }, + { + "epoch": 0.07166687260595576, + "grad_norm": 0.7024037339686016, + "learning_rate": 1.4187192118226602e-05, + "loss": 0.501459538936615, + "step": 145 + }, + { + "epoch": 0.07216112689978994, + "grad_norm": 0.7544878056630825, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.5390491485595703, + "step": 146 + }, + { + "epoch": 0.07265538119362412, + "grad_norm": 0.7358581376182646, + "learning_rate": 1.4384236453201972e-05, + "loss": 0.505649745464325, + "step": 147 + }, + { + "epoch": 0.0731496354874583, + "grad_norm": 0.791834759029257, + "learning_rate": 1.4482758620689657e-05, + "loss": 0.5155121684074402, + "step": 148 + }, + { + "epoch": 0.07364388978129248, + "grad_norm": 0.9182625859668322, + "learning_rate": 1.4581280788177341e-05, + "loss": 0.5502114295959473, + "step": 149 + }, + { + "epoch": 0.07413814407512666, + "grad_norm": 0.7705513444985356, + "learning_rate": 1.4679802955665026e-05, + "loss": 0.5243497490882874, + "step": 150 + }, + { + "epoch": 0.07463239836896084, + "grad_norm": 0.7936247647794451, + "learning_rate": 1.4778325123152711e-05, + "loss": 0.529721736907959, + "step": 151 + }, + { + "epoch": 0.075126652662795, + "grad_norm": 0.7493387955752852, + "learning_rate": 1.4876847290640396e-05, + "loss": 0.4721008241176605, + "step": 152 + }, + { + "epoch": 0.07562090695662918, + "grad_norm": 0.8448372107109295, + "learning_rate": 1.4975369458128079e-05, + "loss": 0.46029576659202576, + "step": 153 + }, + { + "epoch": 0.07611516125046336, + "grad_norm": 0.8666504632745452, + "learning_rate": 1.5073891625615764e-05, + "loss": 0.5151746273040771, + "step": 154 + }, + { + "epoch": 0.07660941554429754, + "grad_norm": 0.8234378506914858, + "learning_rate": 1.5172413793103448e-05, + "loss": 0.4743254780769348, + "step": 155 + }, + { + "epoch": 0.07710366983813172, + "grad_norm": 0.7901189046711773, + "learning_rate": 1.5270935960591133e-05, + "loss": 0.5167561769485474, + "step": 156 + }, + { + "epoch": 0.0775979241319659, + "grad_norm": 0.7442599788530032, + "learning_rate": 1.536945812807882e-05, + "loss": 0.47482365369796753, + "step": 157 + }, + { + "epoch": 0.07809217842580007, + "grad_norm": 0.7472930500337165, + "learning_rate": 1.5467980295566506e-05, + "loss": 0.5088409781455994, + "step": 158 + }, + { + "epoch": 0.07858643271963425, + "grad_norm": 0.839637174922739, + "learning_rate": 1.556650246305419e-05, + "loss": 0.5264201164245605, + "step": 159 + }, + { + "epoch": 0.07908068701346843, + "grad_norm": 0.8043048232381864, + "learning_rate": 1.5665024630541875e-05, + "loss": 0.5475984811782837, + "step": 160 + }, + { + "epoch": 0.07957494130730261, + "grad_norm": 0.813963733997232, + "learning_rate": 1.576354679802956e-05, + "loss": 0.5652282238006592, + "step": 161 + }, + { + "epoch": 0.08006919560113679, + "grad_norm": 0.8257458665080726, + "learning_rate": 1.586206896551724e-05, + "loss": 0.5179979801177979, + "step": 162 + }, + { + "epoch": 0.08056344989497097, + "grad_norm": 0.7453513460678786, + "learning_rate": 1.5960591133004928e-05, + "loss": 0.4966253638267517, + "step": 163 + }, + { + "epoch": 0.08105770418880515, + "grad_norm": 0.7400908854625781, + "learning_rate": 1.605911330049261e-05, + "loss": 0.5216315388679504, + "step": 164 + }, + { + "epoch": 0.08155195848263931, + "grad_norm": 0.7974617542166776, + "learning_rate": 1.6157635467980298e-05, + "loss": 0.495576411485672, + "step": 165 + }, + { + "epoch": 0.08204621277647349, + "grad_norm": 0.7828217496299378, + "learning_rate": 1.625615763546798e-05, + "loss": 0.5101697444915771, + "step": 166 + }, + { + "epoch": 0.08254046707030767, + "grad_norm": 0.7891722656265441, + "learning_rate": 1.6354679802955667e-05, + "loss": 0.5438036918640137, + "step": 167 + }, + { + "epoch": 0.08303472136414185, + "grad_norm": 0.8062908900423786, + "learning_rate": 1.645320197044335e-05, + "loss": 0.5043500661849976, + "step": 168 + }, + { + "epoch": 0.08352897565797603, + "grad_norm": 0.8893145421032131, + "learning_rate": 1.6551724137931037e-05, + "loss": 0.5129355788230896, + "step": 169 + }, + { + "epoch": 0.0840232299518102, + "grad_norm": 0.8344265538652059, + "learning_rate": 1.665024630541872e-05, + "loss": 0.48643916845321655, + "step": 170 + }, + { + "epoch": 0.08451748424564438, + "grad_norm": 0.9138503767586129, + "learning_rate": 1.6748768472906406e-05, + "loss": 0.5300272703170776, + "step": 171 + }, + { + "epoch": 0.08501173853947856, + "grad_norm": 0.9819214205489949, + "learning_rate": 1.684729064039409e-05, + "loss": 0.5321004390716553, + "step": 172 + }, + { + "epoch": 0.08550599283331274, + "grad_norm": 0.9555025734347583, + "learning_rate": 1.6945812807881776e-05, + "loss": 0.5066401958465576, + "step": 173 + }, + { + "epoch": 0.08600024712714692, + "grad_norm": 0.8139597552129452, + "learning_rate": 1.704433497536946e-05, + "loss": 0.48993563652038574, + "step": 174 + }, + { + "epoch": 0.0864945014209811, + "grad_norm": 0.8921248257221488, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.5468013882637024, + "step": 175 + }, + { + "epoch": 0.08698875571481528, + "grad_norm": 0.8277628260630481, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.5081865191459656, + "step": 176 + }, + { + "epoch": 0.08748301000864946, + "grad_norm": 0.7727605442624492, + "learning_rate": 1.7339901477832515e-05, + "loss": 0.48374873399734497, + "step": 177 + }, + { + "epoch": 0.08797726430248362, + "grad_norm": 0.7716185332367417, + "learning_rate": 1.7438423645320198e-05, + "loss": 0.4929465651512146, + "step": 178 + }, + { + "epoch": 0.0884715185963178, + "grad_norm": 0.7369259534742475, + "learning_rate": 1.7536945812807884e-05, + "loss": 0.49666428565979004, + "step": 179 + }, + { + "epoch": 0.08896577289015198, + "grad_norm": 0.9095846029993176, + "learning_rate": 1.7635467980295567e-05, + "loss": 0.5705476403236389, + "step": 180 + }, + { + "epoch": 0.08946002718398616, + "grad_norm": 0.8153458294604309, + "learning_rate": 1.7733990147783254e-05, + "loss": 0.5466605424880981, + "step": 181 + }, + { + "epoch": 0.08995428147782034, + "grad_norm": 0.7908211366510465, + "learning_rate": 1.7832512315270937e-05, + "loss": 0.47837337851524353, + "step": 182 + }, + { + "epoch": 0.09044853577165451, + "grad_norm": 0.8050205335034676, + "learning_rate": 1.7931034482758623e-05, + "loss": 0.5370041131973267, + "step": 183 + }, + { + "epoch": 0.0909427900654887, + "grad_norm": 0.8315453873696782, + "learning_rate": 1.8029556650246306e-05, + "loss": 0.540340006351471, + "step": 184 + }, + { + "epoch": 0.09143704435932287, + "grad_norm": 0.7864886396514408, + "learning_rate": 1.8128078817733993e-05, + "loss": 0.5165396928787231, + "step": 185 + }, + { + "epoch": 0.09193129865315705, + "grad_norm": 1.0212742677335798, + "learning_rate": 1.8226600985221676e-05, + "loss": 0.5391616821289062, + "step": 186 + }, + { + "epoch": 0.09242555294699123, + "grad_norm": 0.8362655612683817, + "learning_rate": 1.8325123152709362e-05, + "loss": 0.472774475812912, + "step": 187 + }, + { + "epoch": 0.09291980724082541, + "grad_norm": 0.7994913228950927, + "learning_rate": 1.8423645320197045e-05, + "loss": 0.5079161524772644, + "step": 188 + }, + { + "epoch": 0.09341406153465959, + "grad_norm": 0.7908069143027292, + "learning_rate": 1.852216748768473e-05, + "loss": 0.4909520149230957, + "step": 189 + }, + { + "epoch": 0.09390831582849377, + "grad_norm": 0.8204263481704893, + "learning_rate": 1.8620689655172415e-05, + "loss": 0.5214540362358093, + "step": 190 + }, + { + "epoch": 0.09440257012232794, + "grad_norm": 1.0097310819423937, + "learning_rate": 1.8719211822660098e-05, + "loss": 0.4820341467857361, + "step": 191 + }, + { + "epoch": 0.09489682441616211, + "grad_norm": 0.7986122947719724, + "learning_rate": 1.8817733990147784e-05, + "loss": 0.5094855427742004, + "step": 192 + }, + { + "epoch": 0.09539107870999629, + "grad_norm": 0.8104059351445748, + "learning_rate": 1.8916256157635468e-05, + "loss": 0.47840312123298645, + "step": 193 + }, + { + "epoch": 0.09588533300383047, + "grad_norm": 0.8556791067143968, + "learning_rate": 1.9014778325123154e-05, + "loss": 0.5368070602416992, + "step": 194 + }, + { + "epoch": 0.09637958729766465, + "grad_norm": 0.8413108625552047, + "learning_rate": 1.911330049261084e-05, + "loss": 0.493880033493042, + "step": 195 + }, + { + "epoch": 0.09687384159149882, + "grad_norm": 0.8344269563446816, + "learning_rate": 1.9211822660098524e-05, + "loss": 0.5052261352539062, + "step": 196 + }, + { + "epoch": 0.097368095885333, + "grad_norm": 0.8488100596559239, + "learning_rate": 1.931034482758621e-05, + "loss": 0.4817495346069336, + "step": 197 + }, + { + "epoch": 0.09786235017916718, + "grad_norm": 0.8835550004433761, + "learning_rate": 1.9408866995073893e-05, + "loss": 0.530259370803833, + "step": 198 + }, + { + "epoch": 0.09835660447300136, + "grad_norm": 0.8634602606490965, + "learning_rate": 1.950738916256158e-05, + "loss": 0.4984540045261383, + "step": 199 + }, + { + "epoch": 0.09885085876683554, + "grad_norm": 0.8655848178642821, + "learning_rate": 1.9605911330049263e-05, + "loss": 0.5472708940505981, + "step": 200 + }, + { + "epoch": 0.09934511306066972, + "grad_norm": 0.8520053240792014, + "learning_rate": 1.970443349753695e-05, + "loss": 0.5394926071166992, + "step": 201 + }, + { + "epoch": 0.0998393673545039, + "grad_norm": 0.9089636816290306, + "learning_rate": 1.9802955665024632e-05, + "loss": 0.5299160480499268, + "step": 202 + }, + { + "epoch": 0.10033362164833808, + "grad_norm": 0.9396000630272938, + "learning_rate": 1.990147783251232e-05, + "loss": 0.506400465965271, + "step": 203 + }, + { + "epoch": 0.10082787594217225, + "grad_norm": 0.7711226267847403, + "learning_rate": 2e-05, + "loss": 0.47956231236457825, + "step": 204 + }, + { + "epoch": 0.10132213023600642, + "grad_norm": 0.8971065288988803, + "learning_rate": 1.9999996662071442e-05, + "loss": 0.48805660009384155, + "step": 205 + }, + { + "epoch": 0.1018163845298406, + "grad_norm": 0.8419748393313904, + "learning_rate": 1.9999986648287996e-05, + "loss": 0.46014025807380676, + "step": 206 + }, + { + "epoch": 0.10231063882367478, + "grad_norm": 0.8667704651728929, + "learning_rate": 1.9999969958656345e-05, + "loss": 0.4654610753059387, + "step": 207 + }, + { + "epoch": 0.10280489311750896, + "grad_norm": 0.8143872307343123, + "learning_rate": 1.999994659318763e-05, + "loss": 0.47037336230278015, + "step": 208 + }, + { + "epoch": 0.10329914741134313, + "grad_norm": 0.8238466130965688, + "learning_rate": 1.999991655189745e-05, + "loss": 0.4853154718875885, + "step": 209 + }, + { + "epoch": 0.10379340170517731, + "grad_norm": 0.8233043672230826, + "learning_rate": 1.9999879834805865e-05, + "loss": 0.4918109178543091, + "step": 210 + }, + { + "epoch": 0.10428765599901149, + "grad_norm": 0.787297319281164, + "learning_rate": 1.999983644193738e-05, + "loss": 0.5136955380439758, + "step": 211 + }, + { + "epoch": 0.10478191029284567, + "grad_norm": 0.8895124065919626, + "learning_rate": 1.9999786373320972e-05, + "loss": 0.5145115852355957, + "step": 212 + }, + { + "epoch": 0.10527616458667985, + "grad_norm": 0.8153315460424436, + "learning_rate": 1.9999729628990058e-05, + "loss": 0.4624764025211334, + "step": 213 + }, + { + "epoch": 0.10577041888051403, + "grad_norm": 0.7949012412003572, + "learning_rate": 1.9999666208982518e-05, + "loss": 0.4599718749523163, + "step": 214 + }, + { + "epoch": 0.1062646731743482, + "grad_norm": 0.8110981138692489, + "learning_rate": 1.99995961133407e-05, + "loss": 0.4642864465713501, + "step": 215 + }, + { + "epoch": 0.10675892746818239, + "grad_norm": 0.7785663242974379, + "learning_rate": 1.9999519342111392e-05, + "loss": 0.4756677448749542, + "step": 216 + }, + { + "epoch": 0.10725318176201656, + "grad_norm": 0.8781173550322721, + "learning_rate": 1.9999435895345846e-05, + "loss": 0.4982803463935852, + "step": 217 + }, + { + "epoch": 0.10774743605585073, + "grad_norm": 0.905677346569408, + "learning_rate": 1.999934577309977e-05, + "loss": 0.5189295411109924, + "step": 218 + }, + { + "epoch": 0.10824169034968491, + "grad_norm": 0.8870093356565885, + "learning_rate": 1.999924897543333e-05, + "loss": 0.5077873468399048, + "step": 219 + }, + { + "epoch": 0.10873594464351909, + "grad_norm": 0.9164316488089079, + "learning_rate": 1.9999145502411148e-05, + "loss": 0.5510451793670654, + "step": 220 + }, + { + "epoch": 0.10923019893735327, + "grad_norm": 0.850640343977404, + "learning_rate": 1.9999035354102298e-05, + "loss": 0.44604551792144775, + "step": 221 + }, + { + "epoch": 0.10972445323118744, + "grad_norm": 0.7739778425864705, + "learning_rate": 1.9998918530580315e-05, + "loss": 0.42567160725593567, + "step": 222 + }, + { + "epoch": 0.11021870752502162, + "grad_norm": 0.8699648367810445, + "learning_rate": 1.9998795031923186e-05, + "loss": 0.4622190594673157, + "step": 223 + }, + { + "epoch": 0.1107129618188558, + "grad_norm": 0.8261943707290175, + "learning_rate": 1.999866485821336e-05, + "loss": 0.5023611783981323, + "step": 224 + }, + { + "epoch": 0.11120721611268998, + "grad_norm": 0.769204860463621, + "learning_rate": 1.9998528009537735e-05, + "loss": 0.451701819896698, + "step": 225 + }, + { + "epoch": 0.11170147040652416, + "grad_norm": 0.9053438794448195, + "learning_rate": 1.9998384485987675e-05, + "loss": 0.48493725061416626, + "step": 226 + }, + { + "epoch": 0.11219572470035834, + "grad_norm": 0.7780216873284675, + "learning_rate": 1.9998234287658996e-05, + "loss": 0.45377853512763977, + "step": 227 + }, + { + "epoch": 0.11268997899419252, + "grad_norm": 0.9129521331875277, + "learning_rate": 1.9998077414651957e-05, + "loss": 0.48963701725006104, + "step": 228 + }, + { + "epoch": 0.1131842332880267, + "grad_norm": 0.8500208947168179, + "learning_rate": 1.9997913867071296e-05, + "loss": 0.47935402393341064, + "step": 229 + }, + { + "epoch": 0.11367848758186087, + "grad_norm": 0.8984825507205957, + "learning_rate": 1.999774364502619e-05, + "loss": 0.46203523874282837, + "step": 230 + }, + { + "epoch": 0.11417274187569504, + "grad_norm": 0.8695917880315948, + "learning_rate": 1.9997566748630274e-05, + "loss": 0.4411412179470062, + "step": 231 + }, + { + "epoch": 0.11466699616952922, + "grad_norm": 0.9063292151670944, + "learning_rate": 1.9997383178001646e-05, + "loss": 0.44424787163734436, + "step": 232 + }, + { + "epoch": 0.1151612504633634, + "grad_norm": 0.9239108187837685, + "learning_rate": 1.9997192933262853e-05, + "loss": 0.4862042963504791, + "step": 233 + }, + { + "epoch": 0.11565550475719757, + "grad_norm": 0.9583721120887143, + "learning_rate": 1.99969960145409e-05, + "loss": 0.49599340558052063, + "step": 234 + }, + { + "epoch": 0.11614975905103175, + "grad_norm": 0.8373453660412895, + "learning_rate": 1.999679242196725e-05, + "loss": 0.49702027440071106, + "step": 235 + }, + { + "epoch": 0.11664401334486593, + "grad_norm": 0.9122480348696357, + "learning_rate": 1.9996582155677813e-05, + "loss": 0.520037829875946, + "step": 236 + }, + { + "epoch": 0.11713826763870011, + "grad_norm": 0.8765545420336399, + "learning_rate": 1.999636521581296e-05, + "loss": 0.4571160674095154, + "step": 237 + }, + { + "epoch": 0.11763252193253429, + "grad_norm": 0.8739431997449725, + "learning_rate": 1.9996141602517526e-05, + "loss": 0.45602840185165405, + "step": 238 + }, + { + "epoch": 0.11812677622636847, + "grad_norm": 0.8737753030098584, + "learning_rate": 1.999591131594078e-05, + "loss": 0.4909728169441223, + "step": 239 + }, + { + "epoch": 0.11862103052020265, + "grad_norm": 0.9637438681008479, + "learning_rate": 1.9995674356236468e-05, + "loss": 0.47716090083122253, + "step": 240 + }, + { + "epoch": 0.11911528481403683, + "grad_norm": 0.8781513787464966, + "learning_rate": 1.9995430723562774e-05, + "loss": 0.4449527859687805, + "step": 241 + }, + { + "epoch": 0.119609539107871, + "grad_norm": 0.9278951723441426, + "learning_rate": 1.9995180418082347e-05, + "loss": 0.49069035053253174, + "step": 242 + }, + { + "epoch": 0.12010379340170518, + "grad_norm": 0.8082383806465664, + "learning_rate": 1.9994923439962286e-05, + "loss": 0.506738543510437, + "step": 243 + }, + { + "epoch": 0.12059804769553935, + "grad_norm": 0.7256243644120642, + "learning_rate": 1.9994659789374145e-05, + "loss": 0.38516658544540405, + "step": 244 + }, + { + "epoch": 0.12109230198937353, + "grad_norm": 1.120005864402108, + "learning_rate": 1.9994389466493942e-05, + "loss": 0.49539780616760254, + "step": 245 + }, + { + "epoch": 0.1215865562832077, + "grad_norm": 0.8099291045850996, + "learning_rate": 1.999411247150213e-05, + "loss": 0.4400706887245178, + "step": 246 + }, + { + "epoch": 0.12208081057704188, + "grad_norm": 1.033732324753182, + "learning_rate": 1.9993828804583625e-05, + "loss": 0.48815736174583435, + "step": 247 + }, + { + "epoch": 0.12257506487087606, + "grad_norm": 0.8506340248073136, + "learning_rate": 1.999353846592781e-05, + "loss": 0.42744773626327515, + "step": 248 + }, + { + "epoch": 0.12306931916471024, + "grad_norm": 0.8847437809130215, + "learning_rate": 1.9993241455728505e-05, + "loss": 0.4370969235897064, + "step": 249 + }, + { + "epoch": 0.12356357345854442, + "grad_norm": 0.8643380888364789, + "learning_rate": 1.9992937774183988e-05, + "loss": 0.4803960621356964, + "step": 250 + }, + { + "epoch": 0.1240578277523786, + "grad_norm": 0.8986867692232635, + "learning_rate": 1.9992627421496994e-05, + "loss": 0.4614640474319458, + "step": 251 + }, + { + "epoch": 0.12455208204621278, + "grad_norm": 0.819634526245566, + "learning_rate": 1.9992310397874715e-05, + "loss": 0.46626490354537964, + "step": 252 + }, + { + "epoch": 0.12504633634004694, + "grad_norm": 0.8614062439986471, + "learning_rate": 1.9991986703528784e-05, + "loss": 0.4812886416912079, + "step": 253 + }, + { + "epoch": 0.12554059063388112, + "grad_norm": 0.782352455662906, + "learning_rate": 1.99916563386753e-05, + "loss": 0.45037686824798584, + "step": 254 + }, + { + "epoch": 0.1260348449277153, + "grad_norm": 0.8735972282090627, + "learning_rate": 1.9991319303534804e-05, + "loss": 0.48492124676704407, + "step": 255 + }, + { + "epoch": 0.12652909922154948, + "grad_norm": 0.9123971905878313, + "learning_rate": 1.9990975598332304e-05, + "loss": 0.48825496435165405, + "step": 256 + }, + { + "epoch": 0.12702335351538366, + "grad_norm": 0.9350748088966393, + "learning_rate": 1.9990625223297244e-05, + "loss": 0.4836634695529938, + "step": 257 + }, + { + "epoch": 0.12751760780921784, + "grad_norm": 0.8091067369882244, + "learning_rate": 1.9990268178663538e-05, + "loss": 0.4632943272590637, + "step": 258 + }, + { + "epoch": 0.12801186210305202, + "grad_norm": 0.8933963237824735, + "learning_rate": 1.9989904464669533e-05, + "loss": 0.4601137042045593, + "step": 259 + }, + { + "epoch": 0.1285061163968862, + "grad_norm": 0.956219889400008, + "learning_rate": 1.998953408155805e-05, + "loss": 0.4390139579772949, + "step": 260 + }, + { + "epoch": 0.12900037069072037, + "grad_norm": 0.8209256250218969, + "learning_rate": 1.9989157029576348e-05, + "loss": 0.45749080181121826, + "step": 261 + }, + { + "epoch": 0.12949462498455455, + "grad_norm": 0.8687280720196128, + "learning_rate": 1.998877330897614e-05, + "loss": 0.4490616023540497, + "step": 262 + }, + { + "epoch": 0.12998887927838873, + "grad_norm": 0.8048623785766325, + "learning_rate": 1.998838292001359e-05, + "loss": 0.4819987714290619, + "step": 263 + }, + { + "epoch": 0.1304831335722229, + "grad_norm": 0.8512266303867803, + "learning_rate": 1.9987985862949325e-05, + "loss": 0.4448384940624237, + "step": 264 + }, + { + "epoch": 0.1309773878660571, + "grad_norm": 0.8699526878628875, + "learning_rate": 1.9987582138048405e-05, + "loss": 0.4574149549007416, + "step": 265 + }, + { + "epoch": 0.13147164215989127, + "grad_norm": 0.8239086741829158, + "learning_rate": 1.9987171745580353e-05, + "loss": 0.4765186607837677, + "step": 266 + }, + { + "epoch": 0.13196589645372545, + "grad_norm": 0.8859727328667625, + "learning_rate": 1.998675468581915e-05, + "loss": 0.4900081753730774, + "step": 267 + }, + { + "epoch": 0.13246015074755962, + "grad_norm": 0.8200731674424109, + "learning_rate": 1.9986330959043206e-05, + "loss": 0.433933287858963, + "step": 268 + }, + { + "epoch": 0.1329544050413938, + "grad_norm": 0.8424887851968712, + "learning_rate": 1.9985900565535403e-05, + "loss": 0.452491819858551, + "step": 269 + }, + { + "epoch": 0.13344865933522798, + "grad_norm": 0.8454499255279871, + "learning_rate": 1.9985463505583062e-05, + "loss": 0.4583294987678528, + "step": 270 + }, + { + "epoch": 0.13394291362906216, + "grad_norm": 0.7993545503780815, + "learning_rate": 1.9985019779477958e-05, + "loss": 0.43183961510658264, + "step": 271 + }, + { + "epoch": 0.13443716792289634, + "grad_norm": 0.8548370246393396, + "learning_rate": 1.998456938751632e-05, + "loss": 0.48075324296951294, + "step": 272 + }, + { + "epoch": 0.13493142221673052, + "grad_norm": 0.9002412472414919, + "learning_rate": 1.9984112329998825e-05, + "loss": 0.5131007432937622, + "step": 273 + }, + { + "epoch": 0.1354256765105647, + "grad_norm": 0.9730858409317547, + "learning_rate": 1.998364860723059e-05, + "loss": 0.4841446876525879, + "step": 274 + }, + { + "epoch": 0.13591993080439888, + "grad_norm": 0.845168898875427, + "learning_rate": 1.9983178219521194e-05, + "loss": 0.5001078248023987, + "step": 275 + }, + { + "epoch": 0.13641418509823305, + "grad_norm": 0.9216453803321015, + "learning_rate": 1.998270116718466e-05, + "loss": 0.44851893186569214, + "step": 276 + }, + { + "epoch": 0.1369084393920672, + "grad_norm": 0.8496437780068066, + "learning_rate": 1.9982217450539464e-05, + "loss": 0.4635714888572693, + "step": 277 + }, + { + "epoch": 0.13740269368590138, + "grad_norm": 0.8697167139912243, + "learning_rate": 1.9981727069908525e-05, + "loss": 0.4171838164329529, + "step": 278 + }, + { + "epoch": 0.13789694797973556, + "grad_norm": 0.9173222191020198, + "learning_rate": 1.9981230025619216e-05, + "loss": 0.4819942116737366, + "step": 279 + }, + { + "epoch": 0.13839120227356974, + "grad_norm": 0.965585018194969, + "learning_rate": 1.998072631800336e-05, + "loss": 0.47878971695899963, + "step": 280 + }, + { + "epoch": 0.13888545656740392, + "grad_norm": 0.8354999533998939, + "learning_rate": 1.9980215947397217e-05, + "loss": 0.4436519145965576, + "step": 281 + }, + { + "epoch": 0.1393797108612381, + "grad_norm": 0.9615471937507843, + "learning_rate": 1.9979698914141507e-05, + "loss": 0.4633050262928009, + "step": 282 + }, + { + "epoch": 0.13987396515507228, + "grad_norm": 0.8419828093645744, + "learning_rate": 1.9979175218581397e-05, + "loss": 0.4264826774597168, + "step": 283 + }, + { + "epoch": 0.14036821944890646, + "grad_norm": 0.9397240311894202, + "learning_rate": 1.9978644861066493e-05, + "loss": 0.47763916850090027, + "step": 284 + }, + { + "epoch": 0.14086247374274063, + "grad_norm": 0.9621046785661004, + "learning_rate": 1.997810784195086e-05, + "loss": 0.44895434379577637, + "step": 285 + }, + { + "epoch": 0.1413567280365748, + "grad_norm": 0.9045420673708359, + "learning_rate": 1.9977564161593e-05, + "loss": 0.4287600517272949, + "step": 286 + }, + { + "epoch": 0.141850982330409, + "grad_norm": 0.9070406248365095, + "learning_rate": 1.997701382035587e-05, + "loss": 0.44175297021865845, + "step": 287 + }, + { + "epoch": 0.14234523662424317, + "grad_norm": 0.9409958894859969, + "learning_rate": 1.9976456818606868e-05, + "loss": 0.4393232464790344, + "step": 288 + }, + { + "epoch": 0.14283949091807735, + "grad_norm": 0.9574764348211552, + "learning_rate": 1.9975893156717836e-05, + "loss": 0.4600023329257965, + "step": 289 + }, + { + "epoch": 0.14333374521191153, + "grad_norm": 0.9582932704552442, + "learning_rate": 1.9975322835065075e-05, + "loss": 0.4819300174713135, + "step": 290 + }, + { + "epoch": 0.1438279995057457, + "grad_norm": 0.8798665685233671, + "learning_rate": 1.9974745854029318e-05, + "loss": 0.4391498267650604, + "step": 291 + }, + { + "epoch": 0.14432225379957989, + "grad_norm": 0.8278978827145046, + "learning_rate": 1.9974162213995748e-05, + "loss": 0.43435904383659363, + "step": 292 + }, + { + "epoch": 0.14481650809341406, + "grad_norm": 0.8555919001416697, + "learning_rate": 1.9973571915354e-05, + "loss": 0.43575727939605713, + "step": 293 + }, + { + "epoch": 0.14531076238724824, + "grad_norm": 0.847472972308698, + "learning_rate": 1.9972974958498145e-05, + "loss": 0.39998459815979004, + "step": 294 + }, + { + "epoch": 0.14580501668108242, + "grad_norm": 0.9068432330089449, + "learning_rate": 1.9972371343826705e-05, + "loss": 0.4620361030101776, + "step": 295 + }, + { + "epoch": 0.1462992709749166, + "grad_norm": 0.9496965104492539, + "learning_rate": 1.9971761071742644e-05, + "loss": 0.5172264575958252, + "step": 296 + }, + { + "epoch": 0.14679352526875078, + "grad_norm": 0.9234160870013586, + "learning_rate": 1.997114414265337e-05, + "loss": 0.4685489535331726, + "step": 297 + }, + { + "epoch": 0.14728777956258496, + "grad_norm": 0.8830728533856737, + "learning_rate": 1.9970520556970735e-05, + "loss": 0.4346499741077423, + "step": 298 + }, + { + "epoch": 0.14778203385641914, + "grad_norm": 0.8462127222831192, + "learning_rate": 1.996989031511104e-05, + "loss": 0.4051141142845154, + "step": 299 + }, + { + "epoch": 0.14827628815025332, + "grad_norm": 1.5751283315817302, + "learning_rate": 1.996925341749502e-05, + "loss": 0.4862591028213501, + "step": 300 + }, + { + "epoch": 0.1487705424440875, + "grad_norm": 0.9475006076143342, + "learning_rate": 1.996860986454787e-05, + "loss": 0.44075754284858704, + "step": 301 + }, + { + "epoch": 0.14926479673792167, + "grad_norm": 0.8707373783945862, + "learning_rate": 1.99679596566992e-05, + "loss": 0.44321805238723755, + "step": 302 + }, + { + "epoch": 0.14975905103175585, + "grad_norm": 0.8195768056986794, + "learning_rate": 1.996730279438309e-05, + "loss": 0.4468157887458801, + "step": 303 + }, + { + "epoch": 0.15025330532559, + "grad_norm": 0.9918503423974457, + "learning_rate": 1.996663927803805e-05, + "loss": 0.48698270320892334, + "step": 304 + }, + { + "epoch": 0.15074755961942418, + "grad_norm": 0.9116215117394889, + "learning_rate": 1.9965969108107032e-05, + "loss": 0.41898253560066223, + "step": 305 + }, + { + "epoch": 0.15124181391325836, + "grad_norm": 0.9221438157249551, + "learning_rate": 1.9965292285037437e-05, + "loss": 0.4827130436897278, + "step": 306 + }, + { + "epoch": 0.15173606820709254, + "grad_norm": 0.8314057300557679, + "learning_rate": 1.99646088092811e-05, + "loss": 0.4219037592411041, + "step": 307 + }, + { + "epoch": 0.15223032250092672, + "grad_norm": 0.8392045773293594, + "learning_rate": 1.9963918681294298e-05, + "loss": 0.4431123733520508, + "step": 308 + }, + { + "epoch": 0.1527245767947609, + "grad_norm": 0.8500815118931239, + "learning_rate": 1.996322190153775e-05, + "loss": 0.4161941409111023, + "step": 309 + }, + { + "epoch": 0.15321883108859508, + "grad_norm": 0.9107651666369411, + "learning_rate": 1.9962518470476617e-05, + "loss": 0.4774768650531769, + "step": 310 + }, + { + "epoch": 0.15371308538242925, + "grad_norm": 0.8037347887475985, + "learning_rate": 1.9961808388580503e-05, + "loss": 0.4196036159992218, + "step": 311 + }, + { + "epoch": 0.15420733967626343, + "grad_norm": 1.0067362464519019, + "learning_rate": 1.996109165632344e-05, + "loss": 0.44241398572921753, + "step": 312 + }, + { + "epoch": 0.1547015939700976, + "grad_norm": 0.888150506782497, + "learning_rate": 1.996036827418392e-05, + "loss": 0.47662627696990967, + "step": 313 + }, + { + "epoch": 0.1551958482639318, + "grad_norm": 0.8458159023673953, + "learning_rate": 1.9959638242644855e-05, + "loss": 0.4241487979888916, + "step": 314 + }, + { + "epoch": 0.15569010255776597, + "grad_norm": 0.9355978957071136, + "learning_rate": 1.9958901562193605e-05, + "loss": 0.45686113834381104, + "step": 315 + }, + { + "epoch": 0.15618435685160015, + "grad_norm": 0.944155507976385, + "learning_rate": 1.9958158233321968e-05, + "loss": 0.4154825806617737, + "step": 316 + }, + { + "epoch": 0.15667861114543433, + "grad_norm": 0.9827195710672626, + "learning_rate": 1.9957408256526176e-05, + "loss": 0.4705435037612915, + "step": 317 + }, + { + "epoch": 0.1571728654392685, + "grad_norm": 0.9880074034620054, + "learning_rate": 1.9956651632306908e-05, + "loss": 0.4367898404598236, + "step": 318 + }, + { + "epoch": 0.15766711973310268, + "grad_norm": 0.9294773909083144, + "learning_rate": 1.9955888361169272e-05, + "loss": 0.4668901264667511, + "step": 319 + }, + { + "epoch": 0.15816137402693686, + "grad_norm": 0.9543525396859661, + "learning_rate": 1.995511844362282e-05, + "loss": 0.46429356932640076, + "step": 320 + }, + { + "epoch": 0.15865562832077104, + "grad_norm": 0.9206239653453478, + "learning_rate": 1.9954341880181536e-05, + "loss": 0.4582952857017517, + "step": 321 + }, + { + "epoch": 0.15914988261460522, + "grad_norm": 0.9460762127599929, + "learning_rate": 1.9953558671363843e-05, + "loss": 0.45110762119293213, + "step": 322 + }, + { + "epoch": 0.1596441369084394, + "grad_norm": 0.9441078381056233, + "learning_rate": 1.99527688176926e-05, + "loss": 0.4049065113067627, + "step": 323 + }, + { + "epoch": 0.16013839120227358, + "grad_norm": 0.8033040053333058, + "learning_rate": 1.9951972319695105e-05, + "loss": 0.40884825587272644, + "step": 324 + }, + { + "epoch": 0.16063264549610776, + "grad_norm": 0.902465277703788, + "learning_rate": 1.9951169177903084e-05, + "loss": 0.4416786730289459, + "step": 325 + }, + { + "epoch": 0.16112689978994194, + "grad_norm": 0.8396124025463547, + "learning_rate": 1.9950359392852704e-05, + "loss": 0.4318765103816986, + "step": 326 + }, + { + "epoch": 0.16162115408377611, + "grad_norm": 0.9197188335811614, + "learning_rate": 1.9949542965084564e-05, + "loss": 0.4415965974330902, + "step": 327 + }, + { + "epoch": 0.1621154083776103, + "grad_norm": 0.9816748337776936, + "learning_rate": 1.9948719895143703e-05, + "loss": 0.4816298186779022, + "step": 328 + }, + { + "epoch": 0.16260966267144447, + "grad_norm": 0.8960734361029558, + "learning_rate": 1.9947890183579594e-05, + "loss": 0.4329088032245636, + "step": 329 + }, + { + "epoch": 0.16310391696527862, + "grad_norm": 0.9960918612087606, + "learning_rate": 1.9947053830946134e-05, + "loss": 0.43193015456199646, + "step": 330 + }, + { + "epoch": 0.1635981712591128, + "grad_norm": 0.9310501291263382, + "learning_rate": 1.994621083780166e-05, + "loss": 0.48738086223602295, + "step": 331 + }, + { + "epoch": 0.16409242555294698, + "grad_norm": 0.9523291617618251, + "learning_rate": 1.9945361204708948e-05, + "loss": 0.4707815647125244, + "step": 332 + }, + { + "epoch": 0.16458667984678116, + "grad_norm": 0.8438149141988297, + "learning_rate": 1.9944504932235198e-05, + "loss": 0.4190637469291687, + "step": 333 + }, + { + "epoch": 0.16508093414061534, + "grad_norm": 0.9348901251563362, + "learning_rate": 1.9943642020952042e-05, + "loss": 0.45955735445022583, + "step": 334 + }, + { + "epoch": 0.16557518843444952, + "grad_norm": 0.9334033255095994, + "learning_rate": 1.9942772471435555e-05, + "loss": 0.4675702750682831, + "step": 335 + }, + { + "epoch": 0.1660694427282837, + "grad_norm": 0.9694338385909206, + "learning_rate": 1.9941896284266224e-05, + "loss": 0.42571327090263367, + "step": 336 + }, + { + "epoch": 0.16656369702211787, + "grad_norm": 0.827954024094364, + "learning_rate": 1.994101346002899e-05, + "loss": 0.4341443181037903, + "step": 337 + }, + { + "epoch": 0.16705795131595205, + "grad_norm": 0.9227161087353433, + "learning_rate": 1.9940123999313214e-05, + "loss": 0.4473035931587219, + "step": 338 + }, + { + "epoch": 0.16755220560978623, + "grad_norm": 0.9514215023205275, + "learning_rate": 1.9939227902712676e-05, + "loss": 0.4692152142524719, + "step": 339 + }, + { + "epoch": 0.1680464599036204, + "grad_norm": 0.902462533797338, + "learning_rate": 1.9938325170825607e-05, + "loss": 0.4169067442417145, + "step": 340 + }, + { + "epoch": 0.1685407141974546, + "grad_norm": 0.8958693793994358, + "learning_rate": 1.9937415804254657e-05, + "loss": 0.451092928647995, + "step": 341 + }, + { + "epoch": 0.16903496849128877, + "grad_norm": 0.9439820250269497, + "learning_rate": 1.99364998036069e-05, + "loss": 0.39640212059020996, + "step": 342 + }, + { + "epoch": 0.16952922278512295, + "grad_norm": 0.9953253959869931, + "learning_rate": 1.9935577169493854e-05, + "loss": 0.46396374702453613, + "step": 343 + }, + { + "epoch": 0.17002347707895712, + "grad_norm": 0.940542166338043, + "learning_rate": 1.9934647902531453e-05, + "loss": 0.4343748390674591, + "step": 344 + }, + { + "epoch": 0.1705177313727913, + "grad_norm": 0.8926095624124082, + "learning_rate": 1.9933712003340056e-05, + "loss": 0.4353589713573456, + "step": 345 + }, + { + "epoch": 0.17101198566662548, + "grad_norm": 0.981244679678695, + "learning_rate": 1.9932769472544464e-05, + "loss": 0.4423677623271942, + "step": 346 + }, + { + "epoch": 0.17150623996045966, + "grad_norm": 0.9632090771111401, + "learning_rate": 1.9931820310773894e-05, + "loss": 0.4382045865058899, + "step": 347 + }, + { + "epoch": 0.17200049425429384, + "grad_norm": 0.9042153187184925, + "learning_rate": 1.993086451866199e-05, + "loss": 0.3966183066368103, + "step": 348 + }, + { + "epoch": 0.17249474854812802, + "grad_norm": 0.9998736444681166, + "learning_rate": 1.9929902096846833e-05, + "loss": 0.48624011874198914, + "step": 349 + }, + { + "epoch": 0.1729890028419622, + "grad_norm": 0.9399569652966117, + "learning_rate": 1.9928933045970913e-05, + "loss": 0.4442569315433502, + "step": 350 + }, + { + "epoch": 0.17348325713579638, + "grad_norm": 0.9204808269523502, + "learning_rate": 1.992795736668116e-05, + "loss": 0.42499929666519165, + "step": 351 + }, + { + "epoch": 0.17397751142963055, + "grad_norm": 0.9507435140290256, + "learning_rate": 1.9926975059628923e-05, + "loss": 0.4230741858482361, + "step": 352 + }, + { + "epoch": 0.17447176572346473, + "grad_norm": 0.9092303670359448, + "learning_rate": 1.9925986125469974e-05, + "loss": 0.4273882806301117, + "step": 353 + }, + { + "epoch": 0.1749660200172989, + "grad_norm": 0.9603670891238569, + "learning_rate": 1.9924990564864513e-05, + "loss": 0.45237618684768677, + "step": 354 + }, + { + "epoch": 0.1754602743111331, + "grad_norm": 0.8737901526941092, + "learning_rate": 1.9923988378477165e-05, + "loss": 0.4115524888038635, + "step": 355 + }, + { + "epoch": 0.17595452860496724, + "grad_norm": 0.8886450314145863, + "learning_rate": 1.9922979566976968e-05, + "loss": 0.4476633071899414, + "step": 356 + }, + { + "epoch": 0.17644878289880142, + "grad_norm": 1.155944411883778, + "learning_rate": 1.9921964131037398e-05, + "loss": 0.44930100440979004, + "step": 357 + }, + { + "epoch": 0.1769430371926356, + "grad_norm": 1.0356351975379994, + "learning_rate": 1.9920942071336338e-05, + "loss": 0.4714374244213104, + "step": 358 + }, + { + "epoch": 0.17743729148646978, + "grad_norm": 0.9469405731486913, + "learning_rate": 1.9919913388556105e-05, + "loss": 0.47696003317832947, + "step": 359 + }, + { + "epoch": 0.17793154578030396, + "grad_norm": 0.9021123492009391, + "learning_rate": 1.9918878083383434e-05, + "loss": 0.44937074184417725, + "step": 360 + }, + { + "epoch": 0.17842580007413814, + "grad_norm": 0.9771832594876818, + "learning_rate": 1.9917836156509472e-05, + "loss": 0.44937658309936523, + "step": 361 + }, + { + "epoch": 0.17892005436797231, + "grad_norm": 0.8240548100976023, + "learning_rate": 1.9916787608629805e-05, + "loss": 0.42068418860435486, + "step": 362 + }, + { + "epoch": 0.1794143086618065, + "grad_norm": 0.9112160927316303, + "learning_rate": 1.9915732440444428e-05, + "loss": 0.3791036605834961, + "step": 363 + }, + { + "epoch": 0.17990856295564067, + "grad_norm": 0.8982890263422821, + "learning_rate": 1.991467065265775e-05, + "loss": 0.401694118976593, + "step": 364 + }, + { + "epoch": 0.18040281724947485, + "grad_norm": 0.9743587318559909, + "learning_rate": 1.9913602245978602e-05, + "loss": 0.44095057249069214, + "step": 365 + }, + { + "epoch": 0.18089707154330903, + "grad_norm": 1.0125028049881057, + "learning_rate": 1.9912527221120248e-05, + "loss": 0.435880184173584, + "step": 366 + }, + { + "epoch": 0.1813913258371432, + "grad_norm": 0.9329716691545672, + "learning_rate": 1.991144557880035e-05, + "loss": 0.4147350490093231, + "step": 367 + }, + { + "epoch": 0.1818855801309774, + "grad_norm": 1.0077861725089856, + "learning_rate": 1.9910357319741006e-05, + "loss": 0.4191502630710602, + "step": 368 + }, + { + "epoch": 0.18237983442481157, + "grad_norm": 0.9334667001994715, + "learning_rate": 1.9909262444668715e-05, + "loss": 0.41988956928253174, + "step": 369 + }, + { + "epoch": 0.18287408871864574, + "grad_norm": 1.0279430559635638, + "learning_rate": 1.99081609543144e-05, + "loss": 0.47451251745224, + "step": 370 + }, + { + "epoch": 0.18336834301247992, + "grad_norm": 0.9591522165165333, + "learning_rate": 1.9907052849413408e-05, + "loss": 0.44665899872779846, + "step": 371 + }, + { + "epoch": 0.1838625973063141, + "grad_norm": 1.0147189696208934, + "learning_rate": 1.990593813070548e-05, + "loss": 0.40575331449508667, + "step": 372 + }, + { + "epoch": 0.18435685160014828, + "grad_norm": 0.869456919545876, + "learning_rate": 1.99048167989348e-05, + "loss": 0.40580621361732483, + "step": 373 + }, + { + "epoch": 0.18485110589398246, + "grad_norm": 0.9514367145479501, + "learning_rate": 1.9903688854849948e-05, + "loss": 0.461843878030777, + "step": 374 + }, + { + "epoch": 0.18534536018781664, + "grad_norm": 0.9237949473924573, + "learning_rate": 1.990255429920392e-05, + "loss": 0.38992881774902344, + "step": 375 + }, + { + "epoch": 0.18583961448165082, + "grad_norm": 0.8831901142276523, + "learning_rate": 1.9901413132754133e-05, + "loss": 0.4288073480129242, + "step": 376 + }, + { + "epoch": 0.186333868775485, + "grad_norm": 0.9233387492673684, + "learning_rate": 1.9900265356262418e-05, + "loss": 0.4376278221607208, + "step": 377 + }, + { + "epoch": 0.18682812306931917, + "grad_norm": 1.0362403856880367, + "learning_rate": 1.9899110970495e-05, + "loss": 0.4127569794654846, + "step": 378 + }, + { + "epoch": 0.18732237736315335, + "grad_norm": 0.9507974239376735, + "learning_rate": 1.9897949976222543e-05, + "loss": 0.4221431016921997, + "step": 379 + }, + { + "epoch": 0.18781663165698753, + "grad_norm": 0.9433678538632697, + "learning_rate": 1.9896782374220108e-05, + "loss": 0.3540682792663574, + "step": 380 + }, + { + "epoch": 0.1883108859508217, + "grad_norm": 0.9261378158924178, + "learning_rate": 1.9895608165267165e-05, + "loss": 0.3746468424797058, + "step": 381 + }, + { + "epoch": 0.1888051402446559, + "grad_norm": 0.885989840984364, + "learning_rate": 1.9894427350147602e-05, + "loss": 0.44986462593078613, + "step": 382 + }, + { + "epoch": 0.18929939453849004, + "grad_norm": 0.990953109983041, + "learning_rate": 1.9893239929649716e-05, + "loss": 0.38902726769447327, + "step": 383 + }, + { + "epoch": 0.18979364883232422, + "grad_norm": 0.9780134618767543, + "learning_rate": 1.9892045904566212e-05, + "loss": 0.43202030658721924, + "step": 384 + }, + { + "epoch": 0.1902879031261584, + "grad_norm": 0.9892650612917288, + "learning_rate": 1.9890845275694197e-05, + "loss": 0.3984760344028473, + "step": 385 + }, + { + "epoch": 0.19078215741999258, + "grad_norm": 0.9818585745680383, + "learning_rate": 1.9889638043835203e-05, + "loss": 0.41927874088287354, + "step": 386 + }, + { + "epoch": 0.19127641171382676, + "grad_norm": 0.8767703705433573, + "learning_rate": 1.9888424209795153e-05, + "loss": 0.3809741735458374, + "step": 387 + }, + { + "epoch": 0.19177066600766093, + "grad_norm": 0.9482820311569345, + "learning_rate": 1.988720377438439e-05, + "loss": 0.4237920045852661, + "step": 388 + }, + { + "epoch": 0.1922649203014951, + "grad_norm": 1.0327070863618417, + "learning_rate": 1.9885976738417662e-05, + "loss": 0.4065277576446533, + "step": 389 + }, + { + "epoch": 0.1927591745953293, + "grad_norm": 0.9237977569787911, + "learning_rate": 1.9884743102714116e-05, + "loss": 0.41154375672340393, + "step": 390 + }, + { + "epoch": 0.19325342888916347, + "grad_norm": 1.2326124039761357, + "learning_rate": 1.9883502868097304e-05, + "loss": 0.46544453501701355, + "step": 391 + }, + { + "epoch": 0.19374768318299765, + "grad_norm": 0.9587510645484782, + "learning_rate": 1.9882256035395204e-05, + "loss": 0.41279950737953186, + "step": 392 + }, + { + "epoch": 0.19424193747683183, + "grad_norm": 0.861022204519604, + "learning_rate": 1.988100260544017e-05, + "loss": 0.40083667635917664, + "step": 393 + }, + { + "epoch": 0.194736191770666, + "grad_norm": 0.8790820180214292, + "learning_rate": 1.9879742579068976e-05, + "loss": 0.40041595697402954, + "step": 394 + }, + { + "epoch": 0.19523044606450018, + "grad_norm": 1.0258873082657662, + "learning_rate": 1.9878475957122803e-05, + "loss": 0.45317894220352173, + "step": 395 + }, + { + "epoch": 0.19572470035833436, + "grad_norm": 0.9348755525455025, + "learning_rate": 1.987720274044723e-05, + "loss": 0.4163329005241394, + "step": 396 + }, + { + "epoch": 0.19621895465216854, + "grad_norm": 0.9706842353465618, + "learning_rate": 1.9875922929892235e-05, + "loss": 0.4252028167247772, + "step": 397 + }, + { + "epoch": 0.19671320894600272, + "grad_norm": 0.9127590943033566, + "learning_rate": 1.9874636526312202e-05, + "loss": 0.40558624267578125, + "step": 398 + }, + { + "epoch": 0.1972074632398369, + "grad_norm": 0.9762994418484081, + "learning_rate": 1.9873343530565913e-05, + "loss": 0.4352114796638489, + "step": 399 + }, + { + "epoch": 0.19770171753367108, + "grad_norm": 0.9123271316620398, + "learning_rate": 1.9872043943516556e-05, + "loss": 0.4076879024505615, + "step": 400 + }, + { + "epoch": 0.19819597182750526, + "grad_norm": 0.9627661884342358, + "learning_rate": 1.987073776603172e-05, + "loss": 0.4406166672706604, + "step": 401 + }, + { + "epoch": 0.19869022612133944, + "grad_norm": 0.8833048421451372, + "learning_rate": 1.9869424998983386e-05, + "loss": 0.3974360227584839, + "step": 402 + }, + { + "epoch": 0.19918448041517361, + "grad_norm": 0.8808806866223299, + "learning_rate": 1.9868105643247934e-05, + "loss": 0.4297831058502197, + "step": 403 + }, + { + "epoch": 0.1996787347090078, + "grad_norm": 0.9793340004481055, + "learning_rate": 1.986677969970616e-05, + "loss": 0.4214811623096466, + "step": 404 + }, + { + "epoch": 0.20017298900284197, + "grad_norm": 0.8979387674277745, + "learning_rate": 1.9865447169243234e-05, + "loss": 0.37227538228034973, + "step": 405 + }, + { + "epoch": 0.20066724329667615, + "grad_norm": 0.9492862396661451, + "learning_rate": 1.986410805274874e-05, + "loss": 0.4367320239543915, + "step": 406 + }, + { + "epoch": 0.20116149759051033, + "grad_norm": 0.9753990450504955, + "learning_rate": 1.9862762351116646e-05, + "loss": 0.4327583909034729, + "step": 407 + }, + { + "epoch": 0.2016557518843445, + "grad_norm": 0.9742332984468446, + "learning_rate": 1.9861410065245332e-05, + "loss": 0.45309939980506897, + "step": 408 + }, + { + "epoch": 0.20215000617817866, + "grad_norm": 0.9433373475369933, + "learning_rate": 1.986005119603756e-05, + "loss": 0.39196106791496277, + "step": 409 + }, + { + "epoch": 0.20264426047201284, + "grad_norm": 0.9834536288459345, + "learning_rate": 1.985868574440049e-05, + "loss": 0.4037923812866211, + "step": 410 + }, + { + "epoch": 0.20313851476584702, + "grad_norm": 0.9331733674072598, + "learning_rate": 1.9857313711245684e-05, + "loss": 0.41214677691459656, + "step": 411 + }, + { + "epoch": 0.2036327690596812, + "grad_norm": 0.9676344806099859, + "learning_rate": 1.9855935097489087e-05, + "loss": 0.4265231192111969, + "step": 412 + }, + { + "epoch": 0.20412702335351537, + "grad_norm": 0.9398051984820485, + "learning_rate": 1.9854549904051046e-05, + "loss": 0.4245712161064148, + "step": 413 + }, + { + "epoch": 0.20462127764734955, + "grad_norm": 1.0688359248893853, + "learning_rate": 1.985315813185629e-05, + "loss": 0.36296984553337097, + "step": 414 + }, + { + "epoch": 0.20511553194118373, + "grad_norm": 0.8752111789079005, + "learning_rate": 1.985175978183395e-05, + "loss": 0.3982447683811188, + "step": 415 + }, + { + "epoch": 0.2056097862350179, + "grad_norm": 0.9696106773901182, + "learning_rate": 1.9850354854917543e-05, + "loss": 0.4087941646575928, + "step": 416 + }, + { + "epoch": 0.2061040405288521, + "grad_norm": 0.9068111697273192, + "learning_rate": 1.9848943352044982e-05, + "loss": 0.4147699177265167, + "step": 417 + }, + { + "epoch": 0.20659829482268627, + "grad_norm": 0.9679150237458849, + "learning_rate": 1.9847525274158562e-05, + "loss": 0.42588335275650024, + "step": 418 + }, + { + "epoch": 0.20709254911652045, + "grad_norm": 0.8455247598954041, + "learning_rate": 1.9846100622204975e-05, + "loss": 0.42607247829437256, + "step": 419 + }, + { + "epoch": 0.20758680341035463, + "grad_norm": 0.8383230576354441, + "learning_rate": 1.9844669397135292e-05, + "loss": 0.3600303530693054, + "step": 420 + }, + { + "epoch": 0.2080810577041888, + "grad_norm": 0.9989742736396935, + "learning_rate": 1.9843231599904988e-05, + "loss": 0.47888651490211487, + "step": 421 + }, + { + "epoch": 0.20857531199802298, + "grad_norm": 0.9050077435994102, + "learning_rate": 1.9841787231473906e-05, + "loss": 0.3789903521537781, + "step": 422 + }, + { + "epoch": 0.20906956629185716, + "grad_norm": 0.9737429395044322, + "learning_rate": 1.9840336292806292e-05, + "loss": 0.3682858943939209, + "step": 423 + }, + { + "epoch": 0.20956382058569134, + "grad_norm": 0.9565489819657318, + "learning_rate": 1.9838878784870772e-05, + "loss": 0.42071375250816345, + "step": 424 + }, + { + "epoch": 0.21005807487952552, + "grad_norm": 0.8997646005118014, + "learning_rate": 1.9837414708640353e-05, + "loss": 0.4258945882320404, + "step": 425 + }, + { + "epoch": 0.2105523291733597, + "grad_norm": 0.8773247199262179, + "learning_rate": 1.9835944065092433e-05, + "loss": 0.42377644777297974, + "step": 426 + }, + { + "epoch": 0.21104658346719388, + "grad_norm": 0.8695535067011908, + "learning_rate": 1.9834466855208795e-05, + "loss": 0.35860198736190796, + "step": 427 + }, + { + "epoch": 0.21154083776102806, + "grad_norm": 0.8547283257189083, + "learning_rate": 1.9832983079975606e-05, + "loss": 0.3498537242412567, + "step": 428 + }, + { + "epoch": 0.21203509205486223, + "grad_norm": 0.9645117506541977, + "learning_rate": 1.9831492740383405e-05, + "loss": 0.3779754042625427, + "step": 429 + }, + { + "epoch": 0.2125293463486964, + "grad_norm": 0.9052431386511324, + "learning_rate": 1.9829995837427124e-05, + "loss": 0.3574570119380951, + "step": 430 + }, + { + "epoch": 0.2130236006425306, + "grad_norm": 0.9528105437455127, + "learning_rate": 1.982849237210608e-05, + "loss": 0.40678369998931885, + "step": 431 + }, + { + "epoch": 0.21351785493636477, + "grad_norm": 1.0383565017869998, + "learning_rate": 1.9826982345423955e-05, + "loss": 0.4392494261264801, + "step": 432 + }, + { + "epoch": 0.21401210923019895, + "grad_norm": 0.9595788699726988, + "learning_rate": 1.982546575838883e-05, + "loss": 0.3858703374862671, + "step": 433 + }, + { + "epoch": 0.21450636352403313, + "grad_norm": 1.022569300933342, + "learning_rate": 1.9823942612013153e-05, + "loss": 0.4427873492240906, + "step": 434 + }, + { + "epoch": 0.21500061781786728, + "grad_norm": 1.0243841009335557, + "learning_rate": 1.9822412907313756e-05, + "loss": 0.40610629320144653, + "step": 435 + }, + { + "epoch": 0.21549487211170146, + "grad_norm": 1.0647698522638835, + "learning_rate": 1.9820876645311847e-05, + "loss": 0.4181024432182312, + "step": 436 + }, + { + "epoch": 0.21598912640553564, + "grad_norm": 0.9101041422869367, + "learning_rate": 1.981933382703301e-05, + "loss": 0.39591747522354126, + "step": 437 + }, + { + "epoch": 0.21648338069936982, + "grad_norm": 1.0250837449595331, + "learning_rate": 1.9817784453507215e-05, + "loss": 0.4326947033405304, + "step": 438 + }, + { + "epoch": 0.216977634993204, + "grad_norm": 1.0886150838818542, + "learning_rate": 1.98162285257688e-05, + "loss": 0.42645522952079773, + "step": 439 + }, + { + "epoch": 0.21747188928703817, + "grad_norm": 0.978930417047399, + "learning_rate": 1.9814666044856472e-05, + "loss": 0.37372538447380066, + "step": 440 + }, + { + "epoch": 0.21796614358087235, + "grad_norm": 1.0917263900138416, + "learning_rate": 1.9813097011813328e-05, + "loss": 0.44066423177719116, + "step": 441 + }, + { + "epoch": 0.21846039787470653, + "grad_norm": 0.9730835844652884, + "learning_rate": 1.9811521427686833e-05, + "loss": 0.39892369508743286, + "step": 442 + }, + { + "epoch": 0.2189546521685407, + "grad_norm": 1.003964491264553, + "learning_rate": 1.980993929352882e-05, + "loss": 0.43497514724731445, + "step": 443 + }, + { + "epoch": 0.2194489064623749, + "grad_norm": 0.9716014988350979, + "learning_rate": 1.9808350610395504e-05, + "loss": 0.3810148239135742, + "step": 444 + }, + { + "epoch": 0.21994316075620907, + "grad_norm": 1.0156931642150575, + "learning_rate": 1.9806755379347465e-05, + "loss": 0.3952462673187256, + "step": 445 + }, + { + "epoch": 0.22043741505004324, + "grad_norm": 0.8774607433571091, + "learning_rate": 1.9805153601449655e-05, + "loss": 0.39168232679367065, + "step": 446 + }, + { + "epoch": 0.22093166934387742, + "grad_norm": 0.8991272209071992, + "learning_rate": 1.98035452777714e-05, + "loss": 0.38572901487350464, + "step": 447 + }, + { + "epoch": 0.2214259236377116, + "grad_norm": 0.9468757778036829, + "learning_rate": 1.980193040938639e-05, + "loss": 0.40514758229255676, + "step": 448 + }, + { + "epoch": 0.22192017793154578, + "grad_norm": 0.9858758484436677, + "learning_rate": 1.9800308997372696e-05, + "loss": 0.4289678931236267, + "step": 449 + }, + { + "epoch": 0.22241443222537996, + "grad_norm": 1.074259689420517, + "learning_rate": 1.979868104281274e-05, + "loss": 0.4082314670085907, + "step": 450 + }, + { + "epoch": 0.22290868651921414, + "grad_norm": 0.8691392363656588, + "learning_rate": 1.979704654679333e-05, + "loss": 0.3819827735424042, + "step": 451 + }, + { + "epoch": 0.22340294081304832, + "grad_norm": 0.9538480526249539, + "learning_rate": 1.979540551040563e-05, + "loss": 0.42063748836517334, + "step": 452 + }, + { + "epoch": 0.2238971951068825, + "grad_norm": 0.9510560747426838, + "learning_rate": 1.9793757934745166e-05, + "loss": 0.41634586453437805, + "step": 453 + }, + { + "epoch": 0.22439144940071667, + "grad_norm": 0.9597511417746731, + "learning_rate": 1.979210382091184e-05, + "loss": 0.4151400625705719, + "step": 454 + }, + { + "epoch": 0.22488570369455085, + "grad_norm": 0.9461794779595009, + "learning_rate": 1.9790443170009918e-05, + "loss": 0.40609729290008545, + "step": 455 + }, + { + "epoch": 0.22537995798838503, + "grad_norm": 0.9000627758052128, + "learning_rate": 1.9788775983148022e-05, + "loss": 0.38967129588127136, + "step": 456 + }, + { + "epoch": 0.2258742122822192, + "grad_norm": 0.9437292574418441, + "learning_rate": 1.978710226143915e-05, + "loss": 0.3833470940589905, + "step": 457 + }, + { + "epoch": 0.2263684665760534, + "grad_norm": 1.0849111028533656, + "learning_rate": 1.978542200600064e-05, + "loss": 0.42918887734413147, + "step": 458 + }, + { + "epoch": 0.22686272086988757, + "grad_norm": 0.8891911900981012, + "learning_rate": 1.978373521795422e-05, + "loss": 0.3793666660785675, + "step": 459 + }, + { + "epoch": 0.22735697516372175, + "grad_norm": 0.9329571379921634, + "learning_rate": 1.978204189842596e-05, + "loss": 0.3885256350040436, + "step": 460 + }, + { + "epoch": 0.22785122945755593, + "grad_norm": 0.9612859575938862, + "learning_rate": 1.97803420485463e-05, + "loss": 0.4003330171108246, + "step": 461 + }, + { + "epoch": 0.22834548375139008, + "grad_norm": 1.0153934251086247, + "learning_rate": 1.9778635669450026e-05, + "loss": 0.4050712585449219, + "step": 462 + }, + { + "epoch": 0.22883973804522426, + "grad_norm": 0.9955917551783842, + "learning_rate": 1.9776922762276304e-05, + "loss": 0.4003967046737671, + "step": 463 + }, + { + "epoch": 0.22933399233905843, + "grad_norm": 1.0625378898456048, + "learning_rate": 1.9775203328168643e-05, + "loss": 0.4506968855857849, + "step": 464 + }, + { + "epoch": 0.2298282466328926, + "grad_norm": 0.9586656507624374, + "learning_rate": 1.9773477368274906e-05, + "loss": 0.3947281241416931, + "step": 465 + }, + { + "epoch": 0.2303225009267268, + "grad_norm": 1.0193199601021392, + "learning_rate": 1.9771744883747326e-05, + "loss": 0.4166758954524994, + "step": 466 + }, + { + "epoch": 0.23081675522056097, + "grad_norm": 0.9824293606770813, + "learning_rate": 1.9770005875742484e-05, + "loss": 0.40400344133377075, + "step": 467 + }, + { + "epoch": 0.23131100951439515, + "grad_norm": 0.9404029827561814, + "learning_rate": 1.9768260345421312e-05, + "loss": 0.4143296480178833, + "step": 468 + }, + { + "epoch": 0.23180526380822933, + "grad_norm": 1.0496759638208417, + "learning_rate": 1.976650829394911e-05, + "loss": 0.39128193259239197, + "step": 469 + }, + { + "epoch": 0.2322995181020635, + "grad_norm": 1.033325283396431, + "learning_rate": 1.9764749722495514e-05, + "loss": 0.4305758476257324, + "step": 470 + }, + { + "epoch": 0.23279377239589769, + "grad_norm": 0.9791981730439014, + "learning_rate": 1.9762984632234523e-05, + "loss": 0.41711747646331787, + "step": 471 + }, + { + "epoch": 0.23328802668973186, + "grad_norm": 0.9590482451910926, + "learning_rate": 1.976121302434449e-05, + "loss": 0.43328845500946045, + "step": 472 + }, + { + "epoch": 0.23378228098356604, + "grad_norm": 0.9134750069589276, + "learning_rate": 1.975943490000811e-05, + "loss": 0.38707420229911804, + "step": 473 + }, + { + "epoch": 0.23427653527740022, + "grad_norm": 0.9896782154106246, + "learning_rate": 1.9757650260412438e-05, + "loss": 0.390054851770401, + "step": 474 + }, + { + "epoch": 0.2347707895712344, + "grad_norm": 1.0430972668852745, + "learning_rate": 1.9755859106748875e-05, + "loss": 0.45697346329689026, + "step": 475 + }, + { + "epoch": 0.23526504386506858, + "grad_norm": 0.950214634248398, + "learning_rate": 1.9754061440213165e-05, + "loss": 0.4381307363510132, + "step": 476 + }, + { + "epoch": 0.23575929815890276, + "grad_norm": 0.9612066818802636, + "learning_rate": 1.9752257262005403e-05, + "loss": 0.4217841625213623, + "step": 477 + }, + { + "epoch": 0.23625355245273694, + "grad_norm": 0.8699003234814695, + "learning_rate": 1.9750446573330038e-05, + "loss": 0.35968005657196045, + "step": 478 + }, + { + "epoch": 0.23674780674657112, + "grad_norm": 0.8353290173002438, + "learning_rate": 1.9748629375395856e-05, + "loss": 0.3516439199447632, + "step": 479 + }, + { + "epoch": 0.2372420610404053, + "grad_norm": 0.9683111499165196, + "learning_rate": 1.9746805669415995e-05, + "loss": 0.4078671634197235, + "step": 480 + }, + { + "epoch": 0.23773631533423947, + "grad_norm": 0.967434671965903, + "learning_rate": 1.9744975456607936e-05, + "loss": 0.39654213190078735, + "step": 481 + }, + { + "epoch": 0.23823056962807365, + "grad_norm": 0.9446129798331165, + "learning_rate": 1.9743138738193498e-05, + "loss": 0.41271698474884033, + "step": 482 + }, + { + "epoch": 0.23872482392190783, + "grad_norm": 0.9563785743614732, + "learning_rate": 1.974129551539885e-05, + "loss": 0.3957251310348511, + "step": 483 + }, + { + "epoch": 0.239219078215742, + "grad_norm": 1.0318067283466978, + "learning_rate": 1.9739445789454506e-05, + "loss": 0.39857393503189087, + "step": 484 + }, + { + "epoch": 0.2397133325095762, + "grad_norm": 0.9625937520590958, + "learning_rate": 1.973758956159531e-05, + "loss": 0.4263526499271393, + "step": 485 + }, + { + "epoch": 0.24020758680341037, + "grad_norm": 0.9782583924092142, + "learning_rate": 1.9735726833060457e-05, + "loss": 0.3849489688873291, + "step": 486 + }, + { + "epoch": 0.24070184109724455, + "grad_norm": 0.9932149128826128, + "learning_rate": 1.9733857605093476e-05, + "loss": 0.431019127368927, + "step": 487 + }, + { + "epoch": 0.2411960953910787, + "grad_norm": 0.9703866882534654, + "learning_rate": 1.973198187894224e-05, + "loss": 0.3740619421005249, + "step": 488 + }, + { + "epoch": 0.24169034968491288, + "grad_norm": 0.9420951155788563, + "learning_rate": 1.9730099655858953e-05, + "loss": 0.361680269241333, + "step": 489 + }, + { + "epoch": 0.24218460397874705, + "grad_norm": 1.0045147685747362, + "learning_rate": 1.9728210937100162e-05, + "loss": 0.41683071851730347, + "step": 490 + }, + { + "epoch": 0.24267885827258123, + "grad_norm": 1.0255058564946795, + "learning_rate": 1.9726315723926746e-05, + "loss": 0.3898739516735077, + "step": 491 + }, + { + "epoch": 0.2431731125664154, + "grad_norm": 0.992746780987763, + "learning_rate": 1.9724414017603925e-05, + "loss": 0.39339032769203186, + "step": 492 + }, + { + "epoch": 0.2436673668602496, + "grad_norm": 0.9018262406248393, + "learning_rate": 1.9722505819401255e-05, + "loss": 0.401676744222641, + "step": 493 + }, + { + "epoch": 0.24416162115408377, + "grad_norm": 0.956392375337736, + "learning_rate": 1.9720591130592613e-05, + "loss": 0.3814789056777954, + "step": 494 + }, + { + "epoch": 0.24465587544791795, + "grad_norm": 1.0339059816881517, + "learning_rate": 1.9718669952456226e-05, + "loss": 0.3980346918106079, + "step": 495 + }, + { + "epoch": 0.24515012974175213, + "grad_norm": 1.0852693818985448, + "learning_rate": 1.971674228627464e-05, + "loss": 0.4222795069217682, + "step": 496 + }, + { + "epoch": 0.2456443840355863, + "grad_norm": 0.9629746856387489, + "learning_rate": 1.971480813333474e-05, + "loss": 0.3795197904109955, + "step": 497 + }, + { + "epoch": 0.24613863832942048, + "grad_norm": 1.0428831707745134, + "learning_rate": 1.971286749492774e-05, + "loss": 0.3746161460876465, + "step": 498 + }, + { + "epoch": 0.24663289262325466, + "grad_norm": 1.0211942338953277, + "learning_rate": 1.9710920372349174e-05, + "loss": 0.3552350699901581, + "step": 499 + }, + { + "epoch": 0.24712714691708884, + "grad_norm": 0.913724645727759, + "learning_rate": 1.9708966766898925e-05, + "loss": 0.39690741896629333, + "step": 500 + }, + { + "epoch": 0.24762140121092302, + "grad_norm": 1.0179277636972188, + "learning_rate": 1.9707006679881186e-05, + "loss": 0.39530014991760254, + "step": 501 + }, + { + "epoch": 0.2481156555047572, + "grad_norm": 1.0722850381631455, + "learning_rate": 1.9705040112604483e-05, + "loss": 0.41228705644607544, + "step": 502 + }, + { + "epoch": 0.24860990979859138, + "grad_norm": 0.9774177098582278, + "learning_rate": 1.9703067066381668e-05, + "loss": 0.4330476224422455, + "step": 503 + }, + { + "epoch": 0.24910416409242556, + "grad_norm": 0.9849824106564479, + "learning_rate": 1.970108754252992e-05, + "loss": 0.38365668058395386, + "step": 504 + }, + { + "epoch": 0.24959841838625973, + "grad_norm": 1.0789440281177851, + "learning_rate": 1.969910154237074e-05, + "loss": 0.4419581890106201, + "step": 505 + }, + { + "epoch": 0.2500926726800939, + "grad_norm": 1.0828116066497757, + "learning_rate": 1.9697109067229957e-05, + "loss": 0.38741230964660645, + "step": 506 + }, + { + "epoch": 0.2505869269739281, + "grad_norm": 0.9914523280251673, + "learning_rate": 1.969511011843771e-05, + "loss": 0.41751983761787415, + "step": 507 + }, + { + "epoch": 0.25108118126776224, + "grad_norm": 0.9718169799013945, + "learning_rate": 1.9693104697328477e-05, + "loss": 0.40355241298675537, + "step": 508 + }, + { + "epoch": 0.25157543556159645, + "grad_norm": 1.003225231520968, + "learning_rate": 1.9691092805241046e-05, + "loss": 0.3511045575141907, + "step": 509 + }, + { + "epoch": 0.2520696898554306, + "grad_norm": 1.1208960250871327, + "learning_rate": 1.9689074443518526e-05, + "loss": 0.38917112350463867, + "step": 510 + }, + { + "epoch": 0.2525639441492648, + "grad_norm": 0.9640213098912707, + "learning_rate": 1.968704961350835e-05, + "loss": 0.40256473422050476, + "step": 511 + }, + { + "epoch": 0.25305819844309896, + "grad_norm": 0.8857886708710384, + "learning_rate": 1.968501831656226e-05, + "loss": 0.32350897789001465, + "step": 512 + }, + { + "epoch": 0.25355245273693316, + "grad_norm": 1.0209548318094466, + "learning_rate": 1.9682980554036322e-05, + "loss": 0.36787012219429016, + "step": 513 + }, + { + "epoch": 0.2540467070307673, + "grad_norm": 1.063374274844625, + "learning_rate": 1.9680936327290924e-05, + "loss": 0.4035605490207672, + "step": 514 + }, + { + "epoch": 0.2545409613246015, + "grad_norm": 0.9437423188361623, + "learning_rate": 1.9678885637690755e-05, + "loss": 0.39402660727500916, + "step": 515 + }, + { + "epoch": 0.2550352156184357, + "grad_norm": 1.1793476229973228, + "learning_rate": 1.967682848660483e-05, + "loss": 0.37553271651268005, + "step": 516 + }, + { + "epoch": 0.2555294699122699, + "grad_norm": 1.047789732428987, + "learning_rate": 1.9674764875406472e-05, + "loss": 0.40148675441741943, + "step": 517 + }, + { + "epoch": 0.25602372420610403, + "grad_norm": 1.1994265366678782, + "learning_rate": 1.967269480547332e-05, + "loss": 0.45255252718925476, + "step": 518 + }, + { + "epoch": 0.25651797849993824, + "grad_norm": 1.0116666478277523, + "learning_rate": 1.9670618278187318e-05, + "loss": 0.4183574616909027, + "step": 519 + }, + { + "epoch": 0.2570122327937724, + "grad_norm": 0.9518606397664687, + "learning_rate": 1.9668535294934733e-05, + "loss": 0.3950796127319336, + "step": 520 + }, + { + "epoch": 0.2575064870876066, + "grad_norm": 0.9729673190351172, + "learning_rate": 1.9666445857106132e-05, + "loss": 0.4062424898147583, + "step": 521 + }, + { + "epoch": 0.25800074138144075, + "grad_norm": 0.9474577180562711, + "learning_rate": 1.966434996609639e-05, + "loss": 0.4095906913280487, + "step": 522 + }, + { + "epoch": 0.25849499567527495, + "grad_norm": 1.1739974412660419, + "learning_rate": 1.96622476233047e-05, + "loss": 0.42302393913269043, + "step": 523 + }, + { + "epoch": 0.2589892499691091, + "grad_norm": 1.0746371790844444, + "learning_rate": 1.966013883013455e-05, + "loss": 0.43204039335250854, + "step": 524 + }, + { + "epoch": 0.2594835042629433, + "grad_norm": 0.9744852361980706, + "learning_rate": 1.9658023587993748e-05, + "loss": 0.39941906929016113, + "step": 525 + }, + { + "epoch": 0.25997775855677746, + "grad_norm": 0.9322675006976836, + "learning_rate": 1.9655901898294397e-05, + "loss": 0.37053728103637695, + "step": 526 + }, + { + "epoch": 0.26047201285061167, + "grad_norm": 0.9500036404091089, + "learning_rate": 1.96537737624529e-05, + "loss": 0.4126317501068115, + "step": 527 + }, + { + "epoch": 0.2609662671444458, + "grad_norm": 0.9592560956850021, + "learning_rate": 1.9651639181889975e-05, + "loss": 0.42397794127464294, + "step": 528 + }, + { + "epoch": 0.26146052143827997, + "grad_norm": 1.09730750123291, + "learning_rate": 1.964949815803064e-05, + "loss": 0.3606872260570526, + "step": 529 + }, + { + "epoch": 0.2619547757321142, + "grad_norm": 1.0256203362936218, + "learning_rate": 1.9647350692304206e-05, + "loss": 0.420923113822937, + "step": 530 + }, + { + "epoch": 0.2624490300259483, + "grad_norm": 1.0242401280009386, + "learning_rate": 1.9645196786144298e-05, + "loss": 0.41700440645217896, + "step": 531 + }, + { + "epoch": 0.26294328431978253, + "grad_norm": 0.9861507549209962, + "learning_rate": 1.9643036440988825e-05, + "loss": 0.3961814045906067, + "step": 532 + }, + { + "epoch": 0.2634375386136167, + "grad_norm": 0.9400998714081333, + "learning_rate": 1.9640869658280005e-05, + "loss": 0.4025250971317291, + "step": 533 + }, + { + "epoch": 0.2639317929074509, + "grad_norm": 1.0201682019086518, + "learning_rate": 1.9638696439464357e-05, + "loss": 0.38828611373901367, + "step": 534 + }, + { + "epoch": 0.26442604720128504, + "grad_norm": 0.8944214314341241, + "learning_rate": 1.963651678599268e-05, + "loss": 0.3109109401702881, + "step": 535 + }, + { + "epoch": 0.26492030149511925, + "grad_norm": 1.0758326810562073, + "learning_rate": 1.963433069932009e-05, + "loss": 0.41516438126564026, + "step": 536 + }, + { + "epoch": 0.2654145557889534, + "grad_norm": 0.972035022615468, + "learning_rate": 1.9632138180905982e-05, + "loss": 0.3765295743942261, + "step": 537 + }, + { + "epoch": 0.2659088100827876, + "grad_norm": 1.0590611315407708, + "learning_rate": 1.9629939232214052e-05, + "loss": 0.37631309032440186, + "step": 538 + }, + { + "epoch": 0.26640306437662176, + "grad_norm": 0.9543257606304313, + "learning_rate": 1.9627733854712286e-05, + "loss": 0.3640018403530121, + "step": 539 + }, + { + "epoch": 0.26689731867045596, + "grad_norm": 1.0213174253270256, + "learning_rate": 1.9625522049872962e-05, + "loss": 0.3971521854400635, + "step": 540 + }, + { + "epoch": 0.2673915729642901, + "grad_norm": 1.0059131210770185, + "learning_rate": 1.962330381917265e-05, + "loss": 0.4218612313270569, + "step": 541 + }, + { + "epoch": 0.2678858272581243, + "grad_norm": 1.0124871124462342, + "learning_rate": 1.9621079164092203e-05, + "loss": 0.38814622163772583, + "step": 542 + }, + { + "epoch": 0.26838008155195847, + "grad_norm": 1.0310689772428585, + "learning_rate": 1.961884808611678e-05, + "loss": 0.3912709355354309, + "step": 543 + }, + { + "epoch": 0.2688743358457927, + "grad_norm": 0.9919097213748044, + "learning_rate": 1.9616610586735808e-05, + "loss": 0.4007106423377991, + "step": 544 + }, + { + "epoch": 0.26936859013962683, + "grad_norm": 0.9871985402956727, + "learning_rate": 1.9614366667443016e-05, + "loss": 0.37406057119369507, + "step": 545 + }, + { + "epoch": 0.26986284443346104, + "grad_norm": 0.970768236440829, + "learning_rate": 1.961211632973641e-05, + "loss": 0.4187811613082886, + "step": 546 + }, + { + "epoch": 0.2703570987272952, + "grad_norm": 1.049304525520643, + "learning_rate": 1.960985957511828e-05, + "loss": 0.44418057799339294, + "step": 547 + }, + { + "epoch": 0.2708513530211294, + "grad_norm": 1.0048719478421346, + "learning_rate": 1.9607596405095205e-05, + "loss": 0.41016438603401184, + "step": 548 + }, + { + "epoch": 0.27134560731496354, + "grad_norm": 1.2563417457062223, + "learning_rate": 1.9605326821178047e-05, + "loss": 0.39461439847946167, + "step": 549 + }, + { + "epoch": 0.27183986160879775, + "grad_norm": 0.9443238609304102, + "learning_rate": 1.960305082488195e-05, + "loss": 0.4159786105155945, + "step": 550 + }, + { + "epoch": 0.2723341159026319, + "grad_norm": 0.9387957037755528, + "learning_rate": 1.960076841772633e-05, + "loss": 0.3702941834926605, + "step": 551 + }, + { + "epoch": 0.2728283701964661, + "grad_norm": 1.0745575617770338, + "learning_rate": 1.9598479601234894e-05, + "loss": 0.3482900559902191, + "step": 552 + }, + { + "epoch": 0.27332262449030026, + "grad_norm": 1.1412061517783256, + "learning_rate": 1.9596184376935618e-05, + "loss": 0.40550655126571655, + "step": 553 + }, + { + "epoch": 0.2738168787841344, + "grad_norm": 0.9446073244587436, + "learning_rate": 1.9593882746360767e-05, + "loss": 0.38604867458343506, + "step": 554 + }, + { + "epoch": 0.2743111330779686, + "grad_norm": 0.9388567147005249, + "learning_rate": 1.9591574711046876e-05, + "loss": 0.36586758494377136, + "step": 555 + }, + { + "epoch": 0.27480538737180277, + "grad_norm": 0.9730414125092071, + "learning_rate": 1.958926027253475e-05, + "loss": 0.37780559062957764, + "step": 556 + }, + { + "epoch": 0.275299641665637, + "grad_norm": 0.9401659835761762, + "learning_rate": 1.9586939432369486e-05, + "loss": 0.3837544322013855, + "step": 557 + }, + { + "epoch": 0.2757938959594711, + "grad_norm": 1.038905164013387, + "learning_rate": 1.9584612192100433e-05, + "loss": 0.39425861835479736, + "step": 558 + }, + { + "epoch": 0.27628815025330533, + "grad_norm": 1.0791545750316935, + "learning_rate": 1.958227855328123e-05, + "loss": 0.4008832275867462, + "step": 559 + }, + { + "epoch": 0.2767824045471395, + "grad_norm": 1.0509839705522974, + "learning_rate": 1.957993851746978e-05, + "loss": 0.42411595582962036, + "step": 560 + }, + { + "epoch": 0.2772766588409737, + "grad_norm": 1.1626138880546706, + "learning_rate": 1.9577592086228257e-05, + "loss": 0.4028055965900421, + "step": 561 + }, + { + "epoch": 0.27777091313480784, + "grad_norm": 0.9383996498843509, + "learning_rate": 1.9575239261123102e-05, + "loss": 0.3785157799720764, + "step": 562 + }, + { + "epoch": 0.27826516742864205, + "grad_norm": 0.9289370196839293, + "learning_rate": 1.9572880043725032e-05, + "loss": 0.3726264536380768, + "step": 563 + }, + { + "epoch": 0.2787594217224762, + "grad_norm": 0.9959287145902769, + "learning_rate": 1.957051443560902e-05, + "loss": 0.37261486053466797, + "step": 564 + }, + { + "epoch": 0.2792536760163104, + "grad_norm": 0.9394373844868922, + "learning_rate": 1.956814243835432e-05, + "loss": 0.34781068563461304, + "step": 565 + }, + { + "epoch": 0.27974793031014455, + "grad_norm": 0.9899407389551799, + "learning_rate": 1.956576405354444e-05, + "loss": 0.3828197121620178, + "step": 566 + }, + { + "epoch": 0.28024218460397876, + "grad_norm": 0.9387592741594649, + "learning_rate": 1.9563379282767156e-05, + "loss": 0.3839726150035858, + "step": 567 + }, + { + "epoch": 0.2807364388978129, + "grad_norm": 1.053498529947078, + "learning_rate": 1.9560988127614507e-05, + "loss": 0.3658025562763214, + "step": 568 + }, + { + "epoch": 0.2812306931916471, + "grad_norm": 1.064206434015044, + "learning_rate": 1.9558590589682795e-05, + "loss": 0.400045782327652, + "step": 569 + }, + { + "epoch": 0.28172494748548127, + "grad_norm": 0.9470530474737298, + "learning_rate": 1.955618667057258e-05, + "loss": 0.36586880683898926, + "step": 570 + }, + { + "epoch": 0.2822192017793155, + "grad_norm": 1.0137760854012388, + "learning_rate": 1.9553776371888684e-05, + "loss": 0.3886389136314392, + "step": 571 + }, + { + "epoch": 0.2827134560731496, + "grad_norm": 1.0159520278130145, + "learning_rate": 1.955135969524019e-05, + "loss": 0.37858110666275024, + "step": 572 + }, + { + "epoch": 0.28320771036698383, + "grad_norm": 0.939134880585939, + "learning_rate": 1.9548936642240435e-05, + "loss": 0.3264877498149872, + "step": 573 + }, + { + "epoch": 0.283701964660818, + "grad_norm": 1.1465399296789363, + "learning_rate": 1.9546507214507017e-05, + "loss": 0.3756924569606781, + "step": 574 + }, + { + "epoch": 0.2841962189546522, + "grad_norm": 1.0922050133590595, + "learning_rate": 1.9544071413661783e-05, + "loss": 0.3773806691169739, + "step": 575 + }, + { + "epoch": 0.28469047324848634, + "grad_norm": 1.0432958526312845, + "learning_rate": 1.9541629241330842e-05, + "loss": 0.37437382340431213, + "step": 576 + }, + { + "epoch": 0.28518472754232055, + "grad_norm": 0.9730241652440514, + "learning_rate": 1.9539180699144552e-05, + "loss": 0.3835929036140442, + "step": 577 + }, + { + "epoch": 0.2856789818361547, + "grad_norm": 1.2039096391780213, + "learning_rate": 1.9536725788737528e-05, + "loss": 0.39163681864738464, + "step": 578 + }, + { + "epoch": 0.2861732361299889, + "grad_norm": 1.1007303408462066, + "learning_rate": 1.953426451174863e-05, + "loss": 0.39241698384284973, + "step": 579 + }, + { + "epoch": 0.28666749042382306, + "grad_norm": 0.9748115984741068, + "learning_rate": 1.953179686982097e-05, + "loss": 0.32731348276138306, + "step": 580 + }, + { + "epoch": 0.2871617447176572, + "grad_norm": 0.9649406632940735, + "learning_rate": 1.9529322864601915e-05, + "loss": 0.34735041856765747, + "step": 581 + }, + { + "epoch": 0.2876559990114914, + "grad_norm": 1.0831552948058796, + "learning_rate": 1.952684249774307e-05, + "loss": 0.3795308470726013, + "step": 582 + }, + { + "epoch": 0.28815025330532557, + "grad_norm": 1.0599543241474398, + "learning_rate": 1.95243557709003e-05, + "loss": 0.3546086549758911, + "step": 583 + }, + { + "epoch": 0.28864450759915977, + "grad_norm": 0.9634030800835625, + "learning_rate": 1.9521862685733703e-05, + "loss": 0.35397839546203613, + "step": 584 + }, + { + "epoch": 0.2891387618929939, + "grad_norm": 0.972134968680729, + "learning_rate": 1.9519363243907627e-05, + "loss": 0.350521981716156, + "step": 585 + }, + { + "epoch": 0.28963301618682813, + "grad_norm": 1.0201322204570258, + "learning_rate": 1.9516857447090663e-05, + "loss": 0.380625456571579, + "step": 586 + }, + { + "epoch": 0.2901272704806623, + "grad_norm": 0.9847688200101109, + "learning_rate": 1.9514345296955647e-05, + "loss": 0.40378236770629883, + "step": 587 + }, + { + "epoch": 0.2906215247744965, + "grad_norm": 1.0122113576142937, + "learning_rate": 1.9511826795179653e-05, + "loss": 0.4050450325012207, + "step": 588 + }, + { + "epoch": 0.29111577906833064, + "grad_norm": 1.048628562831542, + "learning_rate": 1.9509301943444e-05, + "loss": 0.3772329092025757, + "step": 589 + }, + { + "epoch": 0.29161003336216484, + "grad_norm": 1.0803687765146506, + "learning_rate": 1.9506770743434244e-05, + "loss": 0.4079870581626892, + "step": 590 + }, + { + "epoch": 0.292104287655999, + "grad_norm": 1.0069688403525805, + "learning_rate": 1.950423319684017e-05, + "loss": 0.4233503043651581, + "step": 591 + }, + { + "epoch": 0.2925985419498332, + "grad_norm": 1.0403594154189246, + "learning_rate": 1.9501689305355814e-05, + "loss": 0.395530104637146, + "step": 592 + }, + { + "epoch": 0.29309279624366735, + "grad_norm": 1.0468686113369423, + "learning_rate": 1.949913907067944e-05, + "loss": 0.4266175925731659, + "step": 593 + }, + { + "epoch": 0.29358705053750156, + "grad_norm": 1.0371386643985676, + "learning_rate": 1.949658249451355e-05, + "loss": 0.4428660571575165, + "step": 594 + }, + { + "epoch": 0.2940813048313357, + "grad_norm": 0.928511699803538, + "learning_rate": 1.9494019578564874e-05, + "loss": 0.36831945180892944, + "step": 595 + }, + { + "epoch": 0.2945755591251699, + "grad_norm": 1.059362576098806, + "learning_rate": 1.949145032454438e-05, + "loss": 0.392259806394577, + "step": 596 + }, + { + "epoch": 0.29506981341900407, + "grad_norm": 0.9638882642169329, + "learning_rate": 1.948887473416727e-05, + "loss": 0.43743032217025757, + "step": 597 + }, + { + "epoch": 0.2955640677128383, + "grad_norm": 0.9566828851720006, + "learning_rate": 1.9486292809152965e-05, + "loss": 0.3725258409976959, + "step": 598 + }, + { + "epoch": 0.2960583220066724, + "grad_norm": 0.9479087116485218, + "learning_rate": 1.948370455122512e-05, + "loss": 0.39507436752319336, + "step": 599 + }, + { + "epoch": 0.29655257630050663, + "grad_norm": 1.0509892705512045, + "learning_rate": 1.9481109962111623e-05, + "loss": 0.40915870666503906, + "step": 600 + }, + { + "epoch": 0.2970468305943408, + "grad_norm": 0.9528830591600533, + "learning_rate": 1.947850904354459e-05, + "loss": 0.3465006351470947, + "step": 601 + }, + { + "epoch": 0.297541084888175, + "grad_norm": 1.0989483899383072, + "learning_rate": 1.9475901797260346e-05, + "loss": 0.4205567538738251, + "step": 602 + }, + { + "epoch": 0.29803533918200914, + "grad_norm": 0.9986003576186586, + "learning_rate": 1.9473288224999455e-05, + "loss": 0.37682560086250305, + "step": 603 + }, + { + "epoch": 0.29852959347584335, + "grad_norm": 1.0393693996744362, + "learning_rate": 1.9470668328506705e-05, + "loss": 0.3865458369255066, + "step": 604 + }, + { + "epoch": 0.2990238477696775, + "grad_norm": 0.9426218637426483, + "learning_rate": 1.9468042109531096e-05, + "loss": 0.36366063356399536, + "step": 605 + }, + { + "epoch": 0.2995181020635117, + "grad_norm": 0.9801320950707162, + "learning_rate": 1.9465409569825857e-05, + "loss": 0.3861471116542816, + "step": 606 + }, + { + "epoch": 0.30001235635734586, + "grad_norm": 1.0257103381374684, + "learning_rate": 1.9462770711148433e-05, + "loss": 0.3499199151992798, + "step": 607 + }, + { + "epoch": 0.30050661065118, + "grad_norm": 1.1030346241860873, + "learning_rate": 1.946012553526049e-05, + "loss": 0.3704417943954468, + "step": 608 + }, + { + "epoch": 0.3010008649450142, + "grad_norm": 1.0751948386377395, + "learning_rate": 1.9457474043927908e-05, + "loss": 0.41278937458992004, + "step": 609 + }, + { + "epoch": 0.30149511923884836, + "grad_norm": 1.0379271128545955, + "learning_rate": 1.9454816238920787e-05, + "loss": 0.36078256368637085, + "step": 610 + }, + { + "epoch": 0.30198937353268257, + "grad_norm": 1.05890389444684, + "learning_rate": 1.9452152122013434e-05, + "loss": 0.3713051676750183, + "step": 611 + }, + { + "epoch": 0.3024836278265167, + "grad_norm": 1.0547983951495754, + "learning_rate": 1.9449481694984382e-05, + "loss": 0.3919684886932373, + "step": 612 + }, + { + "epoch": 0.3029778821203509, + "grad_norm": 1.1211767888578545, + "learning_rate": 1.9446804959616364e-05, + "loss": 0.4249044358730316, + "step": 613 + }, + { + "epoch": 0.3034721364141851, + "grad_norm": 1.0386798112962086, + "learning_rate": 1.9444121917696335e-05, + "loss": 0.4033172130584717, + "step": 614 + }, + { + "epoch": 0.3039663907080193, + "grad_norm": 1.020453301484689, + "learning_rate": 1.9441432571015455e-05, + "loss": 0.35740789771080017, + "step": 615 + }, + { + "epoch": 0.30446064500185344, + "grad_norm": 1.0567402195641693, + "learning_rate": 1.9438736921369093e-05, + "loss": 0.41219189763069153, + "step": 616 + }, + { + "epoch": 0.30495489929568764, + "grad_norm": 1.0356137182677312, + "learning_rate": 1.9436034970556824e-05, + "loss": 0.3751283884048462, + "step": 617 + }, + { + "epoch": 0.3054491535895218, + "grad_norm": 1.0460808776118622, + "learning_rate": 1.9433326720382433e-05, + "loss": 0.40294593572616577, + "step": 618 + }, + { + "epoch": 0.305943407883356, + "grad_norm": 1.0087358245362568, + "learning_rate": 1.943061217265391e-05, + "loss": 0.4163772463798523, + "step": 619 + }, + { + "epoch": 0.30643766217719015, + "grad_norm": 1.007467123707354, + "learning_rate": 1.9427891329183444e-05, + "loss": 0.3796529769897461, + "step": 620 + }, + { + "epoch": 0.30693191647102436, + "grad_norm": 1.0905533067383615, + "learning_rate": 1.942516419178744e-05, + "loss": 0.44097092747688293, + "step": 621 + }, + { + "epoch": 0.3074261707648585, + "grad_norm": 0.9615172689674734, + "learning_rate": 1.942243076228649e-05, + "loss": 0.384232759475708, + "step": 622 + }, + { + "epoch": 0.3079204250586927, + "grad_norm": 0.9038435200954008, + "learning_rate": 1.941969104250539e-05, + "loss": 0.3734084367752075, + "step": 623 + }, + { + "epoch": 0.30841467935252687, + "grad_norm": 0.9414597847653995, + "learning_rate": 1.9416945034273142e-05, + "loss": 0.3532239496707916, + "step": 624 + }, + { + "epoch": 0.3089089336463611, + "grad_norm": 1.0668895366566058, + "learning_rate": 1.941419273942294e-05, + "loss": 0.39430537819862366, + "step": 625 + }, + { + "epoch": 0.3094031879401952, + "grad_norm": 1.0091341034087684, + "learning_rate": 1.941143415979218e-05, + "loss": 0.35790857672691345, + "step": 626 + }, + { + "epoch": 0.30989744223402943, + "grad_norm": 1.0381854826035726, + "learning_rate": 1.9408669297222446e-05, + "loss": 0.3684060871601105, + "step": 627 + }, + { + "epoch": 0.3103916965278636, + "grad_norm": 0.9553898295016832, + "learning_rate": 1.9405898153559522e-05, + "loss": 0.3425355553627014, + "step": 628 + }, + { + "epoch": 0.3108859508216978, + "grad_norm": 0.9032294986887355, + "learning_rate": 1.9403120730653387e-05, + "loss": 0.3295109272003174, + "step": 629 + }, + { + "epoch": 0.31138020511553194, + "grad_norm": 1.0576168899253493, + "learning_rate": 1.940033703035821e-05, + "loss": 0.37015989422798157, + "step": 630 + }, + { + "epoch": 0.31187445940936614, + "grad_norm": 1.1361288169710941, + "learning_rate": 1.939754705453234e-05, + "loss": 0.40625980496406555, + "step": 631 + }, + { + "epoch": 0.3123687137032003, + "grad_norm": 1.3354529260238757, + "learning_rate": 1.939475080503833e-05, + "loss": 0.42503830790519714, + "step": 632 + }, + { + "epoch": 0.31286296799703445, + "grad_norm": 1.0863606838535078, + "learning_rate": 1.939194828374292e-05, + "loss": 0.36230289936065674, + "step": 633 + }, + { + "epoch": 0.31335722229086865, + "grad_norm": 0.9800314584790245, + "learning_rate": 1.938913949251703e-05, + "loss": 0.4128720164299011, + "step": 634 + }, + { + "epoch": 0.3138514765847028, + "grad_norm": 1.1018828002960295, + "learning_rate": 1.938632443323577e-05, + "loss": 0.39706575870513916, + "step": 635 + }, + { + "epoch": 0.314345730878537, + "grad_norm": 1.0451325322820368, + "learning_rate": 1.9383503107778434e-05, + "loss": 0.38395214080810547, + "step": 636 + }, + { + "epoch": 0.31483998517237116, + "grad_norm": 0.9669746428685202, + "learning_rate": 1.9380675518028495e-05, + "loss": 0.3629944324493408, + "step": 637 + }, + { + "epoch": 0.31533423946620537, + "grad_norm": 1.0589959103814197, + "learning_rate": 1.937784166587361e-05, + "loss": 0.39474761486053467, + "step": 638 + }, + { + "epoch": 0.3158284937600395, + "grad_norm": 1.085403264447479, + "learning_rate": 1.9375001553205627e-05, + "loss": 0.423098087310791, + "step": 639 + }, + { + "epoch": 0.3163227480538737, + "grad_norm": 0.9239589256190138, + "learning_rate": 1.937215518192056e-05, + "loss": 0.3453904986381531, + "step": 640 + }, + { + "epoch": 0.3168170023477079, + "grad_norm": 0.9432054956835023, + "learning_rate": 1.9369302553918605e-05, + "loss": 0.3659127354621887, + "step": 641 + }, + { + "epoch": 0.3173112566415421, + "grad_norm": 1.060860081964917, + "learning_rate": 1.9366443671104132e-05, + "loss": 0.3613426089286804, + "step": 642 + }, + { + "epoch": 0.31780551093537623, + "grad_norm": 0.9515218135636598, + "learning_rate": 1.93635785353857e-05, + "loss": 0.3556531071662903, + "step": 643 + }, + { + "epoch": 0.31829976522921044, + "grad_norm": 0.9893630091198329, + "learning_rate": 1.9360707148676022e-05, + "loss": 0.3515596091747284, + "step": 644 + }, + { + "epoch": 0.3187940195230446, + "grad_norm": 0.9802147109168395, + "learning_rate": 1.9357829512892e-05, + "loss": 0.36270469427108765, + "step": 645 + }, + { + "epoch": 0.3192882738168788, + "grad_norm": 0.9936651325349853, + "learning_rate": 1.9354945629954706e-05, + "loss": 0.3617076277732849, + "step": 646 + }, + { + "epoch": 0.31978252811071295, + "grad_norm": 1.0835943099678094, + "learning_rate": 1.9352055501789376e-05, + "loss": 0.3888331949710846, + "step": 647 + }, + { + "epoch": 0.32027678240454716, + "grad_norm": 1.0454884563674065, + "learning_rate": 1.9349159130325413e-05, + "loss": 0.41199982166290283, + "step": 648 + }, + { + "epoch": 0.3207710366983813, + "grad_norm": 1.0758693507529822, + "learning_rate": 1.93462565174964e-05, + "loss": 0.3878370225429535, + "step": 649 + }, + { + "epoch": 0.3212652909922155, + "grad_norm": 1.0303850194409756, + "learning_rate": 1.9343347665240077e-05, + "loss": 0.380184531211853, + "step": 650 + }, + { + "epoch": 0.32175954528604966, + "grad_norm": 1.143999159363527, + "learning_rate": 1.9340432575498355e-05, + "loss": 0.3746795356273651, + "step": 651 + }, + { + "epoch": 0.32225379957988387, + "grad_norm": 1.0188863097829193, + "learning_rate": 1.93375112502173e-05, + "loss": 0.3700905442237854, + "step": 652 + }, + { + "epoch": 0.322748053873718, + "grad_norm": 0.9032826115280742, + "learning_rate": 1.9334583691347153e-05, + "loss": 0.3331850469112396, + "step": 653 + }, + { + "epoch": 0.32324230816755223, + "grad_norm": 0.949854268007892, + "learning_rate": 1.933164990084231e-05, + "loss": 0.3397464156150818, + "step": 654 + }, + { + "epoch": 0.3237365624613864, + "grad_norm": 1.1199806793436613, + "learning_rate": 1.9328709880661326e-05, + "loss": 0.3837242126464844, + "step": 655 + }, + { + "epoch": 0.3242308167552206, + "grad_norm": 1.0801449332087112, + "learning_rate": 1.9325763632766916e-05, + "loss": 0.38854193687438965, + "step": 656 + }, + { + "epoch": 0.32472507104905474, + "grad_norm": 1.1330798719469783, + "learning_rate": 1.9322811159125955e-05, + "loss": 0.41792556643486023, + "step": 657 + }, + { + "epoch": 0.32521932534288894, + "grad_norm": 0.9831880252943476, + "learning_rate": 1.931985246170947e-05, + "loss": 0.3968243896961212, + "step": 658 + }, + { + "epoch": 0.3257135796367231, + "grad_norm": 1.0416971268065567, + "learning_rate": 1.9316887542492645e-05, + "loss": 0.41183531284332275, + "step": 659 + }, + { + "epoch": 0.32620783393055724, + "grad_norm": 1.0367106782684, + "learning_rate": 1.931391640345482e-05, + "loss": 0.36057350039482117, + "step": 660 + }, + { + "epoch": 0.32670208822439145, + "grad_norm": 1.0663955736026025, + "learning_rate": 1.9310939046579482e-05, + "loss": 0.36032363772392273, + "step": 661 + }, + { + "epoch": 0.3271963425182256, + "grad_norm": 0.9657326304523917, + "learning_rate": 1.9307955473854275e-05, + "loss": 0.3682931363582611, + "step": 662 + }, + { + "epoch": 0.3276905968120598, + "grad_norm": 1.004896861978755, + "learning_rate": 1.9304965687270987e-05, + "loss": 0.3829198181629181, + "step": 663 + }, + { + "epoch": 0.32818485110589396, + "grad_norm": 1.0180253035605964, + "learning_rate": 1.930196968882556e-05, + "loss": 0.3901137709617615, + "step": 664 + }, + { + "epoch": 0.32867910539972817, + "grad_norm": 0.9037607838463562, + "learning_rate": 1.9298967480518077e-05, + "loss": 0.34352344274520874, + "step": 665 + }, + { + "epoch": 0.3291733596935623, + "grad_norm": 0.9918701152773953, + "learning_rate": 1.9295959064352767e-05, + "loss": 0.38822662830352783, + "step": 666 + }, + { + "epoch": 0.3296676139873965, + "grad_norm": 0.9619347095581623, + "learning_rate": 1.9292944442338013e-05, + "loss": 0.3639586567878723, + "step": 667 + }, + { + "epoch": 0.3301618682812307, + "grad_norm": 1.0248410702019595, + "learning_rate": 1.9289923616486326e-05, + "loss": 0.38537997007369995, + "step": 668 + }, + { + "epoch": 0.3306561225750649, + "grad_norm": 0.9469693142742907, + "learning_rate": 1.9286896588814373e-05, + "loss": 0.3514263331890106, + "step": 669 + }, + { + "epoch": 0.33115037686889903, + "grad_norm": 0.9776369401143131, + "learning_rate": 1.928386336134295e-05, + "loss": 0.3873803913593292, + "step": 670 + }, + { + "epoch": 0.33164463116273324, + "grad_norm": 1.0063829461952047, + "learning_rate": 1.9280823936096994e-05, + "loss": 0.36644282937049866, + "step": 671 + }, + { + "epoch": 0.3321388854565674, + "grad_norm": 0.8900960907324665, + "learning_rate": 1.9277778315105587e-05, + "loss": 0.34837427735328674, + "step": 672 + }, + { + "epoch": 0.3326331397504016, + "grad_norm": 1.0946494998655654, + "learning_rate": 1.927472650040194e-05, + "loss": 0.3879021406173706, + "step": 673 + }, + { + "epoch": 0.33312739404423575, + "grad_norm": 1.0256193203663788, + "learning_rate": 1.9271668494023404e-05, + "loss": 0.3753926753997803, + "step": 674 + }, + { + "epoch": 0.33362164833806995, + "grad_norm": 1.1193381317991955, + "learning_rate": 1.9268604298011454e-05, + "loss": 0.35362815856933594, + "step": 675 + }, + { + "epoch": 0.3341159026319041, + "grad_norm": 1.0612190451852097, + "learning_rate": 1.926553391441171e-05, + "loss": 0.3685564696788788, + "step": 676 + }, + { + "epoch": 0.3346101569257383, + "grad_norm": 1.2837359031878948, + "learning_rate": 1.926245734527391e-05, + "loss": 0.42326927185058594, + "step": 677 + }, + { + "epoch": 0.33510441121957246, + "grad_norm": 1.0247968871472715, + "learning_rate": 1.925937459265193e-05, + "loss": 0.35918861627578735, + "step": 678 + }, + { + "epoch": 0.33559866551340667, + "grad_norm": 1.1358099673309532, + "learning_rate": 1.9256285658603773e-05, + "loss": 0.38703471422195435, + "step": 679 + }, + { + "epoch": 0.3360929198072408, + "grad_norm": 1.0232813577835114, + "learning_rate": 1.9253190545191567e-05, + "loss": 0.3993009924888611, + "step": 680 + }, + { + "epoch": 0.336587174101075, + "grad_norm": 1.178587285681796, + "learning_rate": 1.9250089254481566e-05, + "loss": 0.3998498320579529, + "step": 681 + }, + { + "epoch": 0.3370814283949092, + "grad_norm": 1.0577657705862298, + "learning_rate": 1.9246981788544145e-05, + "loss": 0.37211501598358154, + "step": 682 + }, + { + "epoch": 0.3375756826887434, + "grad_norm": 1.0126592857393306, + "learning_rate": 1.9243868149453806e-05, + "loss": 0.37204745411872864, + "step": 683 + }, + { + "epoch": 0.33806993698257753, + "grad_norm": 0.9626025917248462, + "learning_rate": 1.924074833928917e-05, + "loss": 0.3784663677215576, + "step": 684 + }, + { + "epoch": 0.33856419127641174, + "grad_norm": 1.0085796667337208, + "learning_rate": 1.9237622360132975e-05, + "loss": 0.4140951633453369, + "step": 685 + }, + { + "epoch": 0.3390584455702459, + "grad_norm": 1.0251059918961796, + "learning_rate": 1.9234490214072083e-05, + "loss": 0.3723721504211426, + "step": 686 + }, + { + "epoch": 0.33955269986408004, + "grad_norm": 1.0704762953012439, + "learning_rate": 1.923135190319747e-05, + "loss": 0.3714251220226288, + "step": 687 + }, + { + "epoch": 0.34004695415791425, + "grad_norm": 2.286186750342226, + "learning_rate": 1.9228207429604224e-05, + "loss": 0.3551461696624756, + "step": 688 + }, + { + "epoch": 0.3405412084517484, + "grad_norm": 1.0184392375158444, + "learning_rate": 1.9225056795391554e-05, + "loss": 0.3543378412723541, + "step": 689 + }, + { + "epoch": 0.3410354627455826, + "grad_norm": 0.9670805241747071, + "learning_rate": 1.922190000266278e-05, + "loss": 0.3405894935131073, + "step": 690 + }, + { + "epoch": 0.34152971703941676, + "grad_norm": 1.0375943311061684, + "learning_rate": 1.9218737053525324e-05, + "loss": 0.36478808522224426, + "step": 691 + }, + { + "epoch": 0.34202397133325096, + "grad_norm": 1.036881907490894, + "learning_rate": 1.9215567950090734e-05, + "loss": 0.39778709411621094, + "step": 692 + }, + { + "epoch": 0.3425182256270851, + "grad_norm": 0.9719804294561131, + "learning_rate": 1.9212392694474654e-05, + "loss": 0.3553788661956787, + "step": 693 + }, + { + "epoch": 0.3430124799209193, + "grad_norm": 1.0265620111261864, + "learning_rate": 1.920921128879684e-05, + "loss": 0.3393115997314453, + "step": 694 + }, + { + "epoch": 0.3435067342147535, + "grad_norm": 1.2003228723584403, + "learning_rate": 1.9206023735181154e-05, + "loss": 0.4240456819534302, + "step": 695 + }, + { + "epoch": 0.3440009885085877, + "grad_norm": 1.0687040296992496, + "learning_rate": 1.920283003575556e-05, + "loss": 0.3451164960861206, + "step": 696 + }, + { + "epoch": 0.34449524280242183, + "grad_norm": 1.0859108204006387, + "learning_rate": 1.919963019265213e-05, + "loss": 0.4328063726425171, + "step": 697 + }, + { + "epoch": 0.34498949709625604, + "grad_norm": 0.9953984300461581, + "learning_rate": 1.9196424208007026e-05, + "loss": 0.35965877771377563, + "step": 698 + }, + { + "epoch": 0.3454837513900902, + "grad_norm": 1.0276560460371096, + "learning_rate": 1.9193212083960522e-05, + "loss": 0.40995267033576965, + "step": 699 + }, + { + "epoch": 0.3459780056839244, + "grad_norm": 1.047717179086883, + "learning_rate": 1.9189993822656984e-05, + "loss": 0.373586505651474, + "step": 700 + }, + { + "epoch": 0.34647225997775855, + "grad_norm": 0.967832395747722, + "learning_rate": 1.918676942624488e-05, + "loss": 0.3651657998561859, + "step": 701 + }, + { + "epoch": 0.34696651427159275, + "grad_norm": 0.9154206667420104, + "learning_rate": 1.918353889687677e-05, + "loss": 0.3333090543746948, + "step": 702 + }, + { + "epoch": 0.3474607685654269, + "grad_norm": 1.109347895406641, + "learning_rate": 1.9180302236709312e-05, + "loss": 0.444000780582428, + "step": 703 + }, + { + "epoch": 0.3479550228592611, + "grad_norm": 0.9543494832625998, + "learning_rate": 1.917705944790325e-05, + "loss": 0.34942537546157837, + "step": 704 + }, + { + "epoch": 0.34844927715309526, + "grad_norm": 1.206317081042567, + "learning_rate": 1.9173810532623425e-05, + "loss": 0.4709789752960205, + "step": 705 + }, + { + "epoch": 0.34894353144692947, + "grad_norm": 1.0126287373930702, + "learning_rate": 1.917055549303877e-05, + "loss": 0.3615723252296448, + "step": 706 + }, + { + "epoch": 0.3494377857407636, + "grad_norm": 0.98553805717422, + "learning_rate": 1.9167294331322293e-05, + "loss": 0.366035133600235, + "step": 707 + }, + { + "epoch": 0.3499320400345978, + "grad_norm": 1.085095649211616, + "learning_rate": 1.9164027049651105e-05, + "loss": 0.3916548490524292, + "step": 708 + }, + { + "epoch": 0.350426294328432, + "grad_norm": 1.0423550617328055, + "learning_rate": 1.91607536502064e-05, + "loss": 0.3752925992012024, + "step": 709 + }, + { + "epoch": 0.3509205486222662, + "grad_norm": 1.0859051595052658, + "learning_rate": 1.9157474135173448e-05, + "loss": 0.3471261262893677, + "step": 710 + }, + { + "epoch": 0.35141480291610033, + "grad_norm": 1.0011428490015388, + "learning_rate": 1.9154188506741605e-05, + "loss": 0.36898115277290344, + "step": 711 + }, + { + "epoch": 0.3519090572099345, + "grad_norm": 1.0150877470647623, + "learning_rate": 1.9150896767104315e-05, + "loss": 0.38236287236213684, + "step": 712 + }, + { + "epoch": 0.3524033115037687, + "grad_norm": 1.0813644645593066, + "learning_rate": 1.9147598918459096e-05, + "loss": 0.39260241389274597, + "step": 713 + }, + { + "epoch": 0.35289756579760284, + "grad_norm": 0.988095993083205, + "learning_rate": 1.9144294963007542e-05, + "loss": 0.3699083626270294, + "step": 714 + }, + { + "epoch": 0.35339182009143705, + "grad_norm": 0.9649609380548236, + "learning_rate": 1.914098490295532e-05, + "loss": 0.37720543146133423, + "step": 715 + }, + { + "epoch": 0.3538860743852712, + "grad_norm": 0.8834082509396699, + "learning_rate": 1.9137668740512195e-05, + "loss": 0.298441082239151, + "step": 716 + }, + { + "epoch": 0.3543803286791054, + "grad_norm": 0.996165149875045, + "learning_rate": 1.913434647789197e-05, + "loss": 0.3867550194263458, + "step": 717 + }, + { + "epoch": 0.35487458297293956, + "grad_norm": 0.9824732772890364, + "learning_rate": 1.913101811731256e-05, + "loss": 0.37111300230026245, + "step": 718 + }, + { + "epoch": 0.35536883726677376, + "grad_norm": 0.9874274570055057, + "learning_rate": 1.9127683660995916e-05, + "loss": 0.3922812342643738, + "step": 719 + }, + { + "epoch": 0.3558630915606079, + "grad_norm": 1.0744489462576237, + "learning_rate": 1.9124343111168077e-05, + "loss": 0.3878915309906006, + "step": 720 + }, + { + "epoch": 0.3563573458544421, + "grad_norm": 0.9551023310729483, + "learning_rate": 1.9120996470059153e-05, + "loss": 0.34974879026412964, + "step": 721 + }, + { + "epoch": 0.35685160014827627, + "grad_norm": 1.0403250728390605, + "learning_rate": 1.9117643739903306e-05, + "loss": 0.38341426849365234, + "step": 722 + }, + { + "epoch": 0.3573458544421105, + "grad_norm": 0.9876921724558848, + "learning_rate": 1.9114284922938772e-05, + "loss": 0.32610252499580383, + "step": 723 + }, + { + "epoch": 0.35784010873594463, + "grad_norm": 1.0486464385186933, + "learning_rate": 1.9110920021407855e-05, + "loss": 0.37203550338745117, + "step": 724 + }, + { + "epoch": 0.35833436302977884, + "grad_norm": 1.0809240289061282, + "learning_rate": 1.9107549037556906e-05, + "loss": 0.2954786419868469, + "step": 725 + }, + { + "epoch": 0.358828617323613, + "grad_norm": 0.9795897601711951, + "learning_rate": 1.9104171973636353e-05, + "loss": 0.33074450492858887, + "step": 726 + }, + { + "epoch": 0.3593228716174472, + "grad_norm": 1.0341587070514209, + "learning_rate": 1.9100788831900676e-05, + "loss": 0.350687712430954, + "step": 727 + }, + { + "epoch": 0.35981712591128134, + "grad_norm": 1.143909518582956, + "learning_rate": 1.9097399614608406e-05, + "loss": 0.3635619878768921, + "step": 728 + }, + { + "epoch": 0.36031138020511555, + "grad_norm": 1.0607740871884148, + "learning_rate": 1.909400432402214e-05, + "loss": 0.36409544944763184, + "step": 729 + }, + { + "epoch": 0.3608056344989497, + "grad_norm": 1.069313873032721, + "learning_rate": 1.9090602962408523e-05, + "loss": 0.4109501540660858, + "step": 730 + }, + { + "epoch": 0.3612998887927839, + "grad_norm": 1.0147750628685799, + "learning_rate": 1.908719553203826e-05, + "loss": 0.337943971157074, + "step": 731 + }, + { + "epoch": 0.36179414308661806, + "grad_norm": 1.0957860180414656, + "learning_rate": 1.9083782035186097e-05, + "loss": 0.36411553621292114, + "step": 732 + }, + { + "epoch": 0.36228839738045227, + "grad_norm": 1.1570738944902594, + "learning_rate": 1.908036247413084e-05, + "loss": 0.3513786494731903, + "step": 733 + }, + { + "epoch": 0.3627826516742864, + "grad_norm": 1.156885907892102, + "learning_rate": 1.907693685115534e-05, + "loss": 0.4017047584056854, + "step": 734 + }, + { + "epoch": 0.3632769059681206, + "grad_norm": 1.0932284273900412, + "learning_rate": 1.907350516854649e-05, + "loss": 0.3780835270881653, + "step": 735 + }, + { + "epoch": 0.3637711602619548, + "grad_norm": 1.10688269569213, + "learning_rate": 1.9070067428595234e-05, + "loss": 0.35562777519226074, + "step": 736 + }, + { + "epoch": 0.364265414555789, + "grad_norm": 1.0784034928358046, + "learning_rate": 1.9066623633596556e-05, + "loss": 0.34880492091178894, + "step": 737 + }, + { + "epoch": 0.36475966884962313, + "grad_norm": 1.1213824671894879, + "learning_rate": 1.9063173785849488e-05, + "loss": 0.3798677921295166, + "step": 738 + }, + { + "epoch": 0.3652539231434573, + "grad_norm": 1.0300538330170659, + "learning_rate": 1.9059717887657098e-05, + "loss": 0.371119886636734, + "step": 739 + }, + { + "epoch": 0.3657481774372915, + "grad_norm": 1.075537593372937, + "learning_rate": 1.9056255941326497e-05, + "loss": 0.3845891058444977, + "step": 740 + }, + { + "epoch": 0.36624243173112564, + "grad_norm": 1.0460904589757556, + "learning_rate": 1.9052787949168823e-05, + "loss": 0.34627166390419006, + "step": 741 + }, + { + "epoch": 0.36673668602495985, + "grad_norm": 1.0588032623720978, + "learning_rate": 1.9049313913499266e-05, + "loss": 0.3872081935405731, + "step": 742 + }, + { + "epoch": 0.367230940318794, + "grad_norm": 1.0173727289332204, + "learning_rate": 1.9045833836637038e-05, + "loss": 0.40446269512176514, + "step": 743 + }, + { + "epoch": 0.3677251946126282, + "grad_norm": 0.9672045860873493, + "learning_rate": 1.904234772090539e-05, + "loss": 0.3421085476875305, + "step": 744 + }, + { + "epoch": 0.36821944890646235, + "grad_norm": 0.9886363928023795, + "learning_rate": 1.90388555686316e-05, + "loss": 0.3626730442047119, + "step": 745 + }, + { + "epoch": 0.36871370320029656, + "grad_norm": 0.9308335236520315, + "learning_rate": 1.9035357382146984e-05, + "loss": 0.338506281375885, + "step": 746 + }, + { + "epoch": 0.3692079574941307, + "grad_norm": 1.010277605498289, + "learning_rate": 1.903185316378688e-05, + "loss": 0.3709959089756012, + "step": 747 + }, + { + "epoch": 0.3697022117879649, + "grad_norm": 1.0369282663858728, + "learning_rate": 1.9028342915890655e-05, + "loss": 0.3804059624671936, + "step": 748 + }, + { + "epoch": 0.37019646608179907, + "grad_norm": 1.0305613800678137, + "learning_rate": 1.9024826640801694e-05, + "loss": 0.3416539132595062, + "step": 749 + }, + { + "epoch": 0.3706907203756333, + "grad_norm": 1.0119233680399335, + "learning_rate": 1.9021304340867418e-05, + "loss": 0.3642072081565857, + "step": 750 + }, + { + "epoch": 0.3711849746694674, + "grad_norm": 0.9749783281253589, + "learning_rate": 1.9017776018439267e-05, + "loss": 0.35957199335098267, + "step": 751 + }, + { + "epoch": 0.37167922896330163, + "grad_norm": 1.1539382067501942, + "learning_rate": 1.9014241675872692e-05, + "loss": 0.38497287034988403, + "step": 752 + }, + { + "epoch": 0.3721734832571358, + "grad_norm": 1.1731793747690833, + "learning_rate": 1.9010701315527173e-05, + "loss": 0.40713614225387573, + "step": 753 + }, + { + "epoch": 0.37266773755097, + "grad_norm": 1.0417857344342851, + "learning_rate": 1.9007154939766196e-05, + "loss": 0.35115551948547363, + "step": 754 + }, + { + "epoch": 0.37316199184480414, + "grad_norm": 0.958988647508799, + "learning_rate": 1.9003602550957284e-05, + "loss": 0.3478096127510071, + "step": 755 + }, + { + "epoch": 0.37365624613863835, + "grad_norm": 1.040896998789985, + "learning_rate": 1.9000044151471956e-05, + "loss": 0.36460641026496887, + "step": 756 + }, + { + "epoch": 0.3741505004324725, + "grad_norm": 1.1161707385765272, + "learning_rate": 1.8996479743685745e-05, + "loss": 0.38015758991241455, + "step": 757 + }, + { + "epoch": 0.3746447547263067, + "grad_norm": 1.1039269634713542, + "learning_rate": 1.8992909329978202e-05, + "loss": 0.35270214080810547, + "step": 758 + }, + { + "epoch": 0.37513900902014086, + "grad_norm": 1.0025131869881447, + "learning_rate": 1.8989332912732884e-05, + "loss": 0.3875473439693451, + "step": 759 + }, + { + "epoch": 0.37563326331397506, + "grad_norm": 1.0209812095079043, + "learning_rate": 1.8985750494337353e-05, + "loss": 0.3281819820404053, + "step": 760 + }, + { + "epoch": 0.3761275176078092, + "grad_norm": 1.2490133288735825, + "learning_rate": 1.8982162077183182e-05, + "loss": 0.4081311821937561, + "step": 761 + }, + { + "epoch": 0.3766217719016434, + "grad_norm": 1.2134865751354402, + "learning_rate": 1.897856766366595e-05, + "loss": 0.3546852469444275, + "step": 762 + }, + { + "epoch": 0.37711602619547757, + "grad_norm": 0.9620958606777789, + "learning_rate": 1.8974967256185234e-05, + "loss": 0.3177235424518585, + "step": 763 + }, + { + "epoch": 0.3776102804893118, + "grad_norm": 1.0401218813843935, + "learning_rate": 1.8971360857144616e-05, + "loss": 0.3739625811576843, + "step": 764 + }, + { + "epoch": 0.37810453478314593, + "grad_norm": 0.9714277368627854, + "learning_rate": 1.8967748468951673e-05, + "loss": 0.32039010524749756, + "step": 765 + }, + { + "epoch": 0.3785987890769801, + "grad_norm": 1.0178844258047104, + "learning_rate": 1.8964130094017986e-05, + "loss": 0.3237234354019165, + "step": 766 + }, + { + "epoch": 0.3790930433708143, + "grad_norm": 1.0589536664735313, + "learning_rate": 1.896050573475913e-05, + "loss": 0.33864307403564453, + "step": 767 + }, + { + "epoch": 0.37958729766464844, + "grad_norm": 1.076259010215984, + "learning_rate": 1.8956875393594675e-05, + "loss": 0.40412086248397827, + "step": 768 + }, + { + "epoch": 0.38008155195848264, + "grad_norm": 1.049114130745209, + "learning_rate": 1.8953239072948185e-05, + "loss": 0.37689530849456787, + "step": 769 + }, + { + "epoch": 0.3805758062523168, + "grad_norm": 1.1429748380406861, + "learning_rate": 1.8949596775247215e-05, + "loss": 0.3632664680480957, + "step": 770 + }, + { + "epoch": 0.381070060546151, + "grad_norm": 1.0707340379824546, + "learning_rate": 1.8945948502923314e-05, + "loss": 0.384027361869812, + "step": 771 + }, + { + "epoch": 0.38156431483998515, + "grad_norm": 1.0884709757767692, + "learning_rate": 1.8942294258412012e-05, + "loss": 0.37623292207717896, + "step": 772 + }, + { + "epoch": 0.38205856913381936, + "grad_norm": 0.9918916696644151, + "learning_rate": 1.8938634044152837e-05, + "loss": 0.3449557423591614, + "step": 773 + }, + { + "epoch": 0.3825528234276535, + "grad_norm": 1.0216495444427651, + "learning_rate": 1.8934967862589287e-05, + "loss": 0.37977170944213867, + "step": 774 + }, + { + "epoch": 0.3830470777214877, + "grad_norm": 1.035626875821766, + "learning_rate": 1.893129571616886e-05, + "loss": 0.3535463809967041, + "step": 775 + }, + { + "epoch": 0.38354133201532187, + "grad_norm": 0.9784961361645077, + "learning_rate": 1.8927617607343024e-05, + "loss": 0.3107556104660034, + "step": 776 + }, + { + "epoch": 0.3840355863091561, + "grad_norm": 0.9647734455274504, + "learning_rate": 1.8923933538567238e-05, + "loss": 0.33028605580329895, + "step": 777 + }, + { + "epoch": 0.3845298406029902, + "grad_norm": 1.0880250729774004, + "learning_rate": 1.8920243512300925e-05, + "loss": 0.35947421193122864, + "step": 778 + }, + { + "epoch": 0.38502409489682443, + "grad_norm": 1.1225656593555045, + "learning_rate": 1.89165475310075e-05, + "loss": 0.36262935400009155, + "step": 779 + }, + { + "epoch": 0.3855183491906586, + "grad_norm": 0.9595574558826961, + "learning_rate": 1.8912845597154344e-05, + "loss": 0.3441828489303589, + "step": 780 + }, + { + "epoch": 0.3860126034844928, + "grad_norm": 1.1060761912194574, + "learning_rate": 1.8909137713212813e-05, + "loss": 0.3748928904533386, + "step": 781 + }, + { + "epoch": 0.38650685777832694, + "grad_norm": 1.0401989681427097, + "learning_rate": 1.8905423881658248e-05, + "loss": 0.3571966588497162, + "step": 782 + }, + { + "epoch": 0.38700111207216115, + "grad_norm": 1.0661600684644588, + "learning_rate": 1.8901704104969937e-05, + "loss": 0.3937920331954956, + "step": 783 + }, + { + "epoch": 0.3874953663659953, + "grad_norm": 1.036207969764135, + "learning_rate": 1.8897978385631157e-05, + "loss": 0.3641708493232727, + "step": 784 + }, + { + "epoch": 0.3879896206598295, + "grad_norm": 1.0259735566777997, + "learning_rate": 1.8894246726129143e-05, + "loss": 0.33510833978652954, + "step": 785 + }, + { + "epoch": 0.38848387495366365, + "grad_norm": 1.0496886995032506, + "learning_rate": 1.88905091289551e-05, + "loss": 0.3553236722946167, + "step": 786 + }, + { + "epoch": 0.38897812924749786, + "grad_norm": 1.1065055000350301, + "learning_rate": 1.8886765596604188e-05, + "loss": 0.3802195191383362, + "step": 787 + }, + { + "epoch": 0.389472383541332, + "grad_norm": 1.0233155379560877, + "learning_rate": 1.8883016131575546e-05, + "loss": 0.3672805726528168, + "step": 788 + }, + { + "epoch": 0.3899666378351662, + "grad_norm": 1.1021600101810725, + "learning_rate": 1.887926073637225e-05, + "loss": 0.35715609788894653, + "step": 789 + }, + { + "epoch": 0.39046089212900037, + "grad_norm": 1.0669470229074853, + "learning_rate": 1.8875499413501362e-05, + "loss": 0.3800659775733948, + "step": 790 + }, + { + "epoch": 0.3909551464228345, + "grad_norm": 1.0110531011706714, + "learning_rate": 1.8871732165473878e-05, + "loss": 0.36886462569236755, + "step": 791 + }, + { + "epoch": 0.3914494007166687, + "grad_norm": 1.1716485087298352, + "learning_rate": 1.886795899480476e-05, + "loss": 0.37373536825180054, + "step": 792 + }, + { + "epoch": 0.3919436550105029, + "grad_norm": 1.1804493539453536, + "learning_rate": 1.8864179904012932e-05, + "loss": 0.4016551375389099, + "step": 793 + }, + { + "epoch": 0.3924379093043371, + "grad_norm": 1.3227573763511704, + "learning_rate": 1.886039489562125e-05, + "loss": 0.35107535123825073, + "step": 794 + }, + { + "epoch": 0.39293216359817124, + "grad_norm": 1.0690764214154878, + "learning_rate": 1.8856603972156532e-05, + "loss": 0.36280331015586853, + "step": 795 + }, + { + "epoch": 0.39342641789200544, + "grad_norm": 1.0372650355149657, + "learning_rate": 1.885280713614955e-05, + "loss": 0.3417884111404419, + "step": 796 + }, + { + "epoch": 0.3939206721858396, + "grad_norm": 1.0135638633522712, + "learning_rate": 1.8849004390135017e-05, + "loss": 0.3257544934749603, + "step": 797 + }, + { + "epoch": 0.3944149264796738, + "grad_norm": 1.138312578356034, + "learning_rate": 1.8845195736651588e-05, + "loss": 0.3694860339164734, + "step": 798 + }, + { + "epoch": 0.39490918077350795, + "grad_norm": 1.0432466517484986, + "learning_rate": 1.8841381178241865e-05, + "loss": 0.37279266119003296, + "step": 799 + }, + { + "epoch": 0.39540343506734216, + "grad_norm": 1.023281980764518, + "learning_rate": 1.88375607174524e-05, + "loss": 0.38758352398872375, + "step": 800 + }, + { + "epoch": 0.3958976893611763, + "grad_norm": 1.0321652923702807, + "learning_rate": 1.883373435683367e-05, + "loss": 0.34098950028419495, + "step": 801 + }, + { + "epoch": 0.3963919436550105, + "grad_norm": 1.0256865325574602, + "learning_rate": 1.8829902098940105e-05, + "loss": 0.3278653621673584, + "step": 802 + }, + { + "epoch": 0.39688619794884467, + "grad_norm": 1.1042531688452888, + "learning_rate": 1.8826063946330065e-05, + "loss": 0.3673133850097656, + "step": 803 + }, + { + "epoch": 0.39738045224267887, + "grad_norm": 0.9510108180701087, + "learning_rate": 1.882221990156584e-05, + "loss": 0.37917453050613403, + "step": 804 + }, + { + "epoch": 0.397874706536513, + "grad_norm": 0.9926574292369763, + "learning_rate": 1.8818369967213662e-05, + "loss": 0.33986327052116394, + "step": 805 + }, + { + "epoch": 0.39836896083034723, + "grad_norm": 1.0256369099360807, + "learning_rate": 1.8814514145843694e-05, + "loss": 0.34402647614479065, + "step": 806 + }, + { + "epoch": 0.3988632151241814, + "grad_norm": 1.0984836868071073, + "learning_rate": 1.8810652440030026e-05, + "loss": 0.32781803607940674, + "step": 807 + }, + { + "epoch": 0.3993574694180156, + "grad_norm": 1.063630501097469, + "learning_rate": 1.8806784852350678e-05, + "loss": 0.35807961225509644, + "step": 808 + }, + { + "epoch": 0.39985172371184974, + "grad_norm": 1.0130076092125457, + "learning_rate": 1.8802911385387596e-05, + "loss": 0.33577096462249756, + "step": 809 + }, + { + "epoch": 0.40034597800568394, + "grad_norm": 1.205635135602797, + "learning_rate": 1.8799032041726654e-05, + "loss": 0.37786391377449036, + "step": 810 + }, + { + "epoch": 0.4008402322995181, + "grad_norm": 1.0055899694647235, + "learning_rate": 1.879514682395764e-05, + "loss": 0.3237725496292114, + "step": 811 + }, + { + "epoch": 0.4013344865933523, + "grad_norm": 1.0557641796624602, + "learning_rate": 1.8791255734674275e-05, + "loss": 0.29552844166755676, + "step": 812 + }, + { + "epoch": 0.40182874088718645, + "grad_norm": 1.0675222800328668, + "learning_rate": 1.8787358776474192e-05, + "loss": 0.40317612886428833, + "step": 813 + }, + { + "epoch": 0.40232299518102066, + "grad_norm": 1.0548023053217102, + "learning_rate": 1.8783455951958948e-05, + "loss": 0.33383694291114807, + "step": 814 + }, + { + "epoch": 0.4028172494748548, + "grad_norm": 1.0255061005640398, + "learning_rate": 1.8779547263734012e-05, + "loss": 0.35020262002944946, + "step": 815 + }, + { + "epoch": 0.403311503768689, + "grad_norm": 1.098709822155027, + "learning_rate": 1.8775632714408765e-05, + "loss": 0.3742774724960327, + "step": 816 + }, + { + "epoch": 0.40380575806252317, + "grad_norm": 0.9986084839363315, + "learning_rate": 1.8771712306596506e-05, + "loss": 0.35037580132484436, + "step": 817 + }, + { + "epoch": 0.4043000123563573, + "grad_norm": 1.078218018297503, + "learning_rate": 1.8767786042914445e-05, + "loss": 0.3416820168495178, + "step": 818 + }, + { + "epoch": 0.4047942666501915, + "grad_norm": 1.0398523365943921, + "learning_rate": 1.8763853925983695e-05, + "loss": 0.33287927508354187, + "step": 819 + }, + { + "epoch": 0.4052885209440257, + "grad_norm": 1.031774367057856, + "learning_rate": 1.875991595842929e-05, + "loss": 0.3493141531944275, + "step": 820 + }, + { + "epoch": 0.4057827752378599, + "grad_norm": 1.1647269737420223, + "learning_rate": 1.875597214288015e-05, + "loss": 0.4184780418872833, + "step": 821 + }, + { + "epoch": 0.40627702953169403, + "grad_norm": 1.0098974718957208, + "learning_rate": 1.8752022481969116e-05, + "loss": 0.33189794421195984, + "step": 822 + }, + { + "epoch": 0.40677128382552824, + "grad_norm": 1.1012026040533913, + "learning_rate": 1.8748066978332925e-05, + "loss": 0.35339856147766113, + "step": 823 + }, + { + "epoch": 0.4072655381193624, + "grad_norm": 0.990995886573267, + "learning_rate": 1.874410563461221e-05, + "loss": 0.3766328692436218, + "step": 824 + }, + { + "epoch": 0.4077597924131966, + "grad_norm": 1.023451056136873, + "learning_rate": 1.874013845345152e-05, + "loss": 0.32575076818466187, + "step": 825 + }, + { + "epoch": 0.40825404670703075, + "grad_norm": 0.9933822197860499, + "learning_rate": 1.8736165437499273e-05, + "loss": 0.3417864441871643, + "step": 826 + }, + { + "epoch": 0.40874830100086496, + "grad_norm": 1.053854919420327, + "learning_rate": 1.8732186589407807e-05, + "loss": 0.3636544942855835, + "step": 827 + }, + { + "epoch": 0.4092425552946991, + "grad_norm": 1.0398605740994966, + "learning_rate": 1.872820191183334e-05, + "loss": 0.38730406761169434, + "step": 828 + }, + { + "epoch": 0.4097368095885333, + "grad_norm": 1.031894160648423, + "learning_rate": 1.872421140743599e-05, + "loss": 0.3593043088912964, + "step": 829 + }, + { + "epoch": 0.41023106388236746, + "grad_norm": 1.046860972263581, + "learning_rate": 1.872021507887976e-05, + "loss": 0.39092978835105896, + "step": 830 + }, + { + "epoch": 0.41072531817620167, + "grad_norm": 1.1607362555786684, + "learning_rate": 1.8716212928832537e-05, + "loss": 0.3745616674423218, + "step": 831 + }, + { + "epoch": 0.4112195724700358, + "grad_norm": 1.1451994826740608, + "learning_rate": 1.87122049599661e-05, + "loss": 0.39571845531463623, + "step": 832 + }, + { + "epoch": 0.41171382676387, + "grad_norm": 1.0987542615004384, + "learning_rate": 1.8708191174956116e-05, + "loss": 0.35459476709365845, + "step": 833 + }, + { + "epoch": 0.4122080810577042, + "grad_norm": 1.1159636372579822, + "learning_rate": 1.870417157648213e-05, + "loss": 0.38937896490097046, + "step": 834 + }, + { + "epoch": 0.4127023353515384, + "grad_norm": 1.002441779942121, + "learning_rate": 1.8700146167227563e-05, + "loss": 0.33595120906829834, + "step": 835 + }, + { + "epoch": 0.41319658964537254, + "grad_norm": 0.9899088387295479, + "learning_rate": 1.869611494987973e-05, + "loss": 0.332889199256897, + "step": 836 + }, + { + "epoch": 0.41369084393920674, + "grad_norm": 1.0005984941908395, + "learning_rate": 1.8692077927129803e-05, + "loss": 0.333438515663147, + "step": 837 + }, + { + "epoch": 0.4141850982330409, + "grad_norm": 0.9672990037342486, + "learning_rate": 1.868803510167285e-05, + "loss": 0.30645743012428284, + "step": 838 + }, + { + "epoch": 0.4146793525268751, + "grad_norm": 1.0166404987540014, + "learning_rate": 1.86839864762078e-05, + "loss": 0.3333967924118042, + "step": 839 + }, + { + "epoch": 0.41517360682070925, + "grad_norm": 1.1324675944020866, + "learning_rate": 1.867993205343746e-05, + "loss": 0.36230576038360596, + "step": 840 + }, + { + "epoch": 0.41566786111454346, + "grad_norm": 1.4565152055506116, + "learning_rate": 1.8675871836068498e-05, + "loss": 0.34191709756851196, + "step": 841 + }, + { + "epoch": 0.4161621154083776, + "grad_norm": 1.1876819294674656, + "learning_rate": 1.8671805826811462e-05, + "loss": 0.3115188479423523, + "step": 842 + }, + { + "epoch": 0.4166563697022118, + "grad_norm": 1.023080563524472, + "learning_rate": 1.866773402838076e-05, + "loss": 0.3725768029689789, + "step": 843 + }, + { + "epoch": 0.41715062399604597, + "grad_norm": 1.1051799194693688, + "learning_rate": 1.8663656443494673e-05, + "loss": 0.376983642578125, + "step": 844 + }, + { + "epoch": 0.4176448782898801, + "grad_norm": 1.0101343157113072, + "learning_rate": 1.8659573074875327e-05, + "loss": 0.31490784883499146, + "step": 845 + }, + { + "epoch": 0.4181391325837143, + "grad_norm": 1.0250002510666845, + "learning_rate": 1.8655483925248727e-05, + "loss": 0.3533504605293274, + "step": 846 + }, + { + "epoch": 0.4186333868775485, + "grad_norm": 1.090746715781531, + "learning_rate": 1.8651388997344734e-05, + "loss": 0.3282274305820465, + "step": 847 + }, + { + "epoch": 0.4191276411713827, + "grad_norm": 1.1145704933282803, + "learning_rate": 1.8647288293897055e-05, + "loss": 0.32892414927482605, + "step": 848 + }, + { + "epoch": 0.41962189546521683, + "grad_norm": 1.1451436882679205, + "learning_rate": 1.864318181764327e-05, + "loss": 0.40414246916770935, + "step": 849 + }, + { + "epoch": 0.42011614975905104, + "grad_norm": 0.9874933781402742, + "learning_rate": 1.8639069571324798e-05, + "loss": 0.30335378646850586, + "step": 850 + }, + { + "epoch": 0.4206104040528852, + "grad_norm": 1.0390790492756226, + "learning_rate": 1.863495155768692e-05, + "loss": 0.311710000038147, + "step": 851 + }, + { + "epoch": 0.4211046583467194, + "grad_norm": 1.1685121542837038, + "learning_rate": 1.8630827779478755e-05, + "loss": 0.37345218658447266, + "step": 852 + }, + { + "epoch": 0.42159891264055355, + "grad_norm": 1.118375459884757, + "learning_rate": 1.8626698239453287e-05, + "loss": 0.37286317348480225, + "step": 853 + }, + { + "epoch": 0.42209316693438775, + "grad_norm": 1.061435107804804, + "learning_rate": 1.8622562940367335e-05, + "loss": 0.3706691861152649, + "step": 854 + }, + { + "epoch": 0.4225874212282219, + "grad_norm": 1.045639661440086, + "learning_rate": 1.8618421884981567e-05, + "loss": 0.30183354020118713, + "step": 855 + }, + { + "epoch": 0.4230816755220561, + "grad_norm": 0.9282918926966607, + "learning_rate": 1.8614275076060486e-05, + "loss": 0.32329827547073364, + "step": 856 + }, + { + "epoch": 0.42357592981589026, + "grad_norm": 0.9823332197669685, + "learning_rate": 1.861012251637245e-05, + "loss": 0.39380010962486267, + "step": 857 + }, + { + "epoch": 0.42407018410972447, + "grad_norm": 1.2258684110272524, + "learning_rate": 1.8605964208689646e-05, + "loss": 0.41745316982269287, + "step": 858 + }, + { + "epoch": 0.4245644384035586, + "grad_norm": 1.0539643629085786, + "learning_rate": 1.86018001557881e-05, + "loss": 0.36751389503479004, + "step": 859 + }, + { + "epoch": 0.4250586926973928, + "grad_norm": 1.052378043397748, + "learning_rate": 1.8597630360447673e-05, + "loss": 0.36876100301742554, + "step": 860 + }, + { + "epoch": 0.425552946991227, + "grad_norm": 1.0649813734142937, + "learning_rate": 1.8593454825452067e-05, + "loss": 0.3473365306854248, + "step": 861 + }, + { + "epoch": 0.4260472012850612, + "grad_norm": 1.0186749062796028, + "learning_rate": 1.8589273553588802e-05, + "loss": 0.3429828882217407, + "step": 862 + }, + { + "epoch": 0.42654145557889533, + "grad_norm": 0.9471164855143414, + "learning_rate": 1.8585086547649238e-05, + "loss": 0.3424219787120819, + "step": 863 + }, + { + "epoch": 0.42703570987272954, + "grad_norm": 1.002345729786534, + "learning_rate": 1.8580893810428562e-05, + "loss": 0.32187891006469727, + "step": 864 + }, + { + "epoch": 0.4275299641665637, + "grad_norm": 0.997893238522563, + "learning_rate": 1.8576695344725785e-05, + "loss": 0.3116072416305542, + "step": 865 + }, + { + "epoch": 0.4280242184603979, + "grad_norm": 0.9198063604105835, + "learning_rate": 1.8572491153343742e-05, + "loss": 0.32645124197006226, + "step": 866 + }, + { + "epoch": 0.42851847275423205, + "grad_norm": 1.0827892730720303, + "learning_rate": 1.8568281239089088e-05, + "loss": 0.36861616373062134, + "step": 867 + }, + { + "epoch": 0.42901272704806626, + "grad_norm": 1.05561333743087, + "learning_rate": 1.8564065604772307e-05, + "loss": 0.38477885723114014, + "step": 868 + }, + { + "epoch": 0.4295069813419004, + "grad_norm": 1.1711610330815532, + "learning_rate": 1.8559844253207694e-05, + "loss": 0.352588951587677, + "step": 869 + }, + { + "epoch": 0.43000123563573456, + "grad_norm": 1.1459489566657088, + "learning_rate": 1.8555617187213362e-05, + "loss": 0.43443864583969116, + "step": 870 + }, + { + "epoch": 0.43049548992956876, + "grad_norm": 1.1608032541581428, + "learning_rate": 1.8551384409611238e-05, + "loss": 0.37355685234069824, + "step": 871 + }, + { + "epoch": 0.4309897442234029, + "grad_norm": 1.120838755410591, + "learning_rate": 1.854714592322707e-05, + "loss": 0.3529026508331299, + "step": 872 + }, + { + "epoch": 0.4314839985172371, + "grad_norm": 1.031744932760461, + "learning_rate": 1.854290173089041e-05, + "loss": 0.3278823494911194, + "step": 873 + }, + { + "epoch": 0.4319782528110713, + "grad_norm": 1.045846838310407, + "learning_rate": 1.8538651835434615e-05, + "loss": 0.3677588999271393, + "step": 874 + }, + { + "epoch": 0.4324725071049055, + "grad_norm": 0.9726822011565114, + "learning_rate": 1.8534396239696852e-05, + "loss": 0.34132176637649536, + "step": 875 + }, + { + "epoch": 0.43296676139873963, + "grad_norm": 0.967842291132869, + "learning_rate": 1.8530134946518106e-05, + "loss": 0.3329963684082031, + "step": 876 + }, + { + "epoch": 0.43346101569257384, + "grad_norm": 1.1447169522915757, + "learning_rate": 1.852586795874315e-05, + "loss": 0.38435080647468567, + "step": 877 + }, + { + "epoch": 0.433955269986408, + "grad_norm": 1.076068410050275, + "learning_rate": 1.8521595279220564e-05, + "loss": 0.3737541735172272, + "step": 878 + }, + { + "epoch": 0.4344495242802422, + "grad_norm": 1.0947429210573731, + "learning_rate": 1.851731691080273e-05, + "loss": 0.3676382303237915, + "step": 879 + }, + { + "epoch": 0.43494377857407635, + "grad_norm": 0.9624268111771948, + "learning_rate": 1.8513032856345825e-05, + "loss": 0.317960262298584, + "step": 880 + }, + { + "epoch": 0.43543803286791055, + "grad_norm": 1.040958800557315, + "learning_rate": 1.8508743118709816e-05, + "loss": 0.38857966661453247, + "step": 881 + }, + { + "epoch": 0.4359322871617447, + "grad_norm": 1.0694529449199925, + "learning_rate": 1.8504447700758482e-05, + "loss": 0.33234506845474243, + "step": 882 + }, + { + "epoch": 0.4364265414555789, + "grad_norm": 1.0262098516685678, + "learning_rate": 1.8500146605359375e-05, + "loss": 0.3380611538887024, + "step": 883 + }, + { + "epoch": 0.43692079574941306, + "grad_norm": 1.032922511494617, + "learning_rate": 1.8495839835383845e-05, + "loss": 0.36386823654174805, + "step": 884 + }, + { + "epoch": 0.43741505004324727, + "grad_norm": 1.0814661245803954, + "learning_rate": 1.849152739370703e-05, + "loss": 0.34711897373199463, + "step": 885 + }, + { + "epoch": 0.4379093043370814, + "grad_norm": 1.1112439466083954, + "learning_rate": 1.848720928320786e-05, + "loss": 0.3861457109451294, + "step": 886 + }, + { + "epoch": 0.4384035586309156, + "grad_norm": 1.0062524071684966, + "learning_rate": 1.848288550676904e-05, + "loss": 0.3387115001678467, + "step": 887 + }, + { + "epoch": 0.4388978129247498, + "grad_norm": 1.119801920916648, + "learning_rate": 1.847855606727706e-05, + "loss": 0.3419748842716217, + "step": 888 + }, + { + "epoch": 0.439392067218584, + "grad_norm": 1.1162084355940824, + "learning_rate": 1.847422096762219e-05, + "loss": 0.38184499740600586, + "step": 889 + }, + { + "epoch": 0.43988632151241813, + "grad_norm": 1.1974191241625343, + "learning_rate": 1.846988021069849e-05, + "loss": 0.3845345973968506, + "step": 890 + }, + { + "epoch": 0.44038057580625234, + "grad_norm": 1.035257767207683, + "learning_rate": 1.8465533799403778e-05, + "loss": 0.31854647397994995, + "step": 891 + }, + { + "epoch": 0.4408748301000865, + "grad_norm": 1.2150547461116588, + "learning_rate": 1.8461181736639658e-05, + "loss": 0.3940027356147766, + "step": 892 + }, + { + "epoch": 0.4413690843939207, + "grad_norm": 1.0827124100419134, + "learning_rate": 1.8456824025311508e-05, + "loss": 0.3580612540245056, + "step": 893 + }, + { + "epoch": 0.44186333868775485, + "grad_norm": 1.0457692243819372, + "learning_rate": 1.8452460668328474e-05, + "loss": 0.3662642240524292, + "step": 894 + }, + { + "epoch": 0.44235759298158905, + "grad_norm": 1.3135451040729966, + "learning_rate": 1.8448091668603464e-05, + "loss": 0.29031360149383545, + "step": 895 + }, + { + "epoch": 0.4428518472754232, + "grad_norm": 1.2267380523250877, + "learning_rate": 1.844371702905317e-05, + "loss": 0.36141306161880493, + "step": 896 + }, + { + "epoch": 0.44334610156925736, + "grad_norm": 0.9926258795727512, + "learning_rate": 1.8439336752598027e-05, + "loss": 0.35286253690719604, + "step": 897 + }, + { + "epoch": 0.44384035586309156, + "grad_norm": 1.0509214985554662, + "learning_rate": 1.8434950842162256e-05, + "loss": 0.38967087864875793, + "step": 898 + }, + { + "epoch": 0.4443346101569257, + "grad_norm": 1.1041873655686079, + "learning_rate": 1.8430559300673824e-05, + "loss": 0.4260423183441162, + "step": 899 + }, + { + "epoch": 0.4448288644507599, + "grad_norm": 1.0004221402171782, + "learning_rate": 1.8426162131064456e-05, + "loss": 0.35336780548095703, + "step": 900 + }, + { + "epoch": 0.44532311874459407, + "grad_norm": 1.0124996907215051, + "learning_rate": 1.842175933626965e-05, + "loss": 0.32953035831451416, + "step": 901 + }, + { + "epoch": 0.4458173730384283, + "grad_norm": 1.1481125848953921, + "learning_rate": 1.841735091922864e-05, + "loss": 0.3495085537433624, + "step": 902 + }, + { + "epoch": 0.44631162733226243, + "grad_norm": 1.0556558347257945, + "learning_rate": 1.8412936882884426e-05, + "loss": 0.3774382174015045, + "step": 903 + }, + { + "epoch": 0.44680588162609663, + "grad_norm": 1.1488659780400408, + "learning_rate": 1.8408517230183756e-05, + "loss": 0.397183358669281, + "step": 904 + }, + { + "epoch": 0.4473001359199308, + "grad_norm": 1.1226988100601583, + "learning_rate": 1.840409196407713e-05, + "loss": 0.4004632234573364, + "step": 905 + }, + { + "epoch": 0.447794390213765, + "grad_norm": 0.9888048683742604, + "learning_rate": 1.8399661087518784e-05, + "loss": 0.3464478850364685, + "step": 906 + }, + { + "epoch": 0.44828864450759914, + "grad_norm": 1.0618254470638813, + "learning_rate": 1.839522460346671e-05, + "loss": 0.38161879777908325, + "step": 907 + }, + { + "epoch": 0.44878289880143335, + "grad_norm": 1.0021571541379897, + "learning_rate": 1.839078251488265e-05, + "loss": 0.3307412266731262, + "step": 908 + }, + { + "epoch": 0.4492771530952675, + "grad_norm": 1.0558486391083746, + "learning_rate": 1.838633482473207e-05, + "loss": 0.3238945007324219, + "step": 909 + }, + { + "epoch": 0.4497714073891017, + "grad_norm": 1.1763396472681338, + "learning_rate": 1.8381881535984186e-05, + "loss": 0.37863802909851074, + "step": 910 + }, + { + "epoch": 0.45026566168293586, + "grad_norm": 1.187536001798055, + "learning_rate": 1.8377422651611955e-05, + "loss": 0.35920199751853943, + "step": 911 + }, + { + "epoch": 0.45075991597677006, + "grad_norm": 1.1108046485108733, + "learning_rate": 1.8372958174592054e-05, + "loss": 0.3913283050060272, + "step": 912 + }, + { + "epoch": 0.4512541702706042, + "grad_norm": 1.029447767687351, + "learning_rate": 1.8368488107904916e-05, + "loss": 0.32950836420059204, + "step": 913 + }, + { + "epoch": 0.4517484245644384, + "grad_norm": 0.9275296283957708, + "learning_rate": 1.8364012454534687e-05, + "loss": 0.30557066202163696, + "step": 914 + }, + { + "epoch": 0.4522426788582726, + "grad_norm": 1.0685283966213752, + "learning_rate": 1.835953121746925e-05, + "loss": 0.3280435800552368, + "step": 915 + }, + { + "epoch": 0.4527369331521068, + "grad_norm": 1.0053118292301932, + "learning_rate": 1.835504439970021e-05, + "loss": 0.323611319065094, + "step": 916 + }, + { + "epoch": 0.45323118744594093, + "grad_norm": 1.086332749113099, + "learning_rate": 1.835055200422292e-05, + "loss": 0.3794775605201721, + "step": 917 + }, + { + "epoch": 0.45372544173977514, + "grad_norm": 1.1746257984153148, + "learning_rate": 1.8346054034036418e-05, + "loss": 0.3437816798686981, + "step": 918 + }, + { + "epoch": 0.4542196960336093, + "grad_norm": 1.175593282348777, + "learning_rate": 1.8341550492143497e-05, + "loss": 0.40312957763671875, + "step": 919 + }, + { + "epoch": 0.4547139503274435, + "grad_norm": 1.0344840643948632, + "learning_rate": 1.833704138155065e-05, + "loss": 0.33988016843795776, + "step": 920 + }, + { + "epoch": 0.45520820462127765, + "grad_norm": 1.099362227926189, + "learning_rate": 1.83325267052681e-05, + "loss": 0.30893969535827637, + "step": 921 + }, + { + "epoch": 0.45570245891511185, + "grad_norm": 1.1279932203915406, + "learning_rate": 1.832800646630978e-05, + "loss": 0.3351095914840698, + "step": 922 + }, + { + "epoch": 0.456196713208946, + "grad_norm": 1.0211776718159757, + "learning_rate": 1.8323480667693335e-05, + "loss": 0.3235122561454773, + "step": 923 + }, + { + "epoch": 0.45669096750278015, + "grad_norm": 1.0274671423740642, + "learning_rate": 1.8318949312440126e-05, + "loss": 0.3482256531715393, + "step": 924 + }, + { + "epoch": 0.45718522179661436, + "grad_norm": 1.0223238909560575, + "learning_rate": 1.831441240357522e-05, + "loss": 0.3577580451965332, + "step": 925 + }, + { + "epoch": 0.4576794760904485, + "grad_norm": 1.100617534966992, + "learning_rate": 1.8309869944127386e-05, + "loss": 0.34081172943115234, + "step": 926 + }, + { + "epoch": 0.4581737303842827, + "grad_norm": 1.1911908757683491, + "learning_rate": 1.8305321937129118e-05, + "loss": 0.4041389524936676, + "step": 927 + }, + { + "epoch": 0.45866798467811687, + "grad_norm": 0.9300326755373893, + "learning_rate": 1.830076838561659e-05, + "loss": 0.3014240562915802, + "step": 928 + }, + { + "epoch": 0.4591622389719511, + "grad_norm": 1.0061666296037273, + "learning_rate": 1.829620929262969e-05, + "loss": 0.3105698823928833, + "step": 929 + }, + { + "epoch": 0.4596564932657852, + "grad_norm": 1.035696211609358, + "learning_rate": 1.8291644661212008e-05, + "loss": 0.36114832758903503, + "step": 930 + }, + { + "epoch": 0.46015074755961943, + "grad_norm": 1.0621844186259055, + "learning_rate": 1.828707449441082e-05, + "loss": 0.33738240599632263, + "step": 931 + }, + { + "epoch": 0.4606450018534536, + "grad_norm": 1.0507412286541111, + "learning_rate": 1.8282498795277108e-05, + "loss": 0.3455100655555725, + "step": 932 + }, + { + "epoch": 0.4611392561472878, + "grad_norm": 1.0635377650103532, + "learning_rate": 1.8277917566865544e-05, + "loss": 0.3622395992279053, + "step": 933 + }, + { + "epoch": 0.46163351044112194, + "grad_norm": 1.1698746861585616, + "learning_rate": 1.8273330812234488e-05, + "loss": 0.36942192912101746, + "step": 934 + }, + { + "epoch": 0.46212776473495615, + "grad_norm": 1.1083328377879573, + "learning_rate": 1.8268738534445996e-05, + "loss": 0.33603039383888245, + "step": 935 + }, + { + "epoch": 0.4626220190287903, + "grad_norm": 1.0473328437100615, + "learning_rate": 1.82641407365658e-05, + "loss": 0.34806567430496216, + "step": 936 + }, + { + "epoch": 0.4631162733226245, + "grad_norm": 1.0559884618945852, + "learning_rate": 1.8259537421663333e-05, + "loss": 0.35512328147888184, + "step": 937 + }, + { + "epoch": 0.46361052761645866, + "grad_norm": 1.0108795008514326, + "learning_rate": 1.8254928592811695e-05, + "loss": 0.33349719643592834, + "step": 938 + }, + { + "epoch": 0.46410478191029286, + "grad_norm": 1.2122442261111321, + "learning_rate": 1.8250314253087677e-05, + "loss": 0.3510274887084961, + "step": 939 + }, + { + "epoch": 0.464599036204127, + "grad_norm": 1.2184941603930532, + "learning_rate": 1.824569440557175e-05, + "loss": 0.35831883549690247, + "step": 940 + }, + { + "epoch": 0.4650932904979612, + "grad_norm": 1.1635496425287044, + "learning_rate": 1.824106905334805e-05, + "loss": 0.353208065032959, + "step": 941 + }, + { + "epoch": 0.46558754479179537, + "grad_norm": 1.1400926219916139, + "learning_rate": 1.8236438199504402e-05, + "loss": 0.3335849642753601, + "step": 942 + }, + { + "epoch": 0.4660817990856296, + "grad_norm": 1.0623049779098108, + "learning_rate": 1.8231801847132294e-05, + "loss": 0.346247136592865, + "step": 943 + }, + { + "epoch": 0.46657605337946373, + "grad_norm": 1.0719060242361118, + "learning_rate": 1.8227159999326895e-05, + "loss": 0.35125380754470825, + "step": 944 + }, + { + "epoch": 0.46707030767329794, + "grad_norm": 1.026675887024196, + "learning_rate": 1.822251265918703e-05, + "loss": 0.34262675046920776, + "step": 945 + }, + { + "epoch": 0.4675645619671321, + "grad_norm": 1.0951735908349534, + "learning_rate": 1.82178598298152e-05, + "loss": 0.3437168598175049, + "step": 946 + }, + { + "epoch": 0.4680588162609663, + "grad_norm": 1.2204880290084008, + "learning_rate": 1.8213201514317565e-05, + "loss": 0.35729774832725525, + "step": 947 + }, + { + "epoch": 0.46855307055480044, + "grad_norm": 1.1062871199303559, + "learning_rate": 1.8208537715803954e-05, + "loss": 0.36507898569107056, + "step": 948 + }, + { + "epoch": 0.46904732484863465, + "grad_norm": 1.0875432400928187, + "learning_rate": 1.8203868437387847e-05, + "loss": 0.363017737865448, + "step": 949 + }, + { + "epoch": 0.4695415791424688, + "grad_norm": 1.0718622311605446, + "learning_rate": 1.8199193682186388e-05, + "loss": 0.3645821511745453, + "step": 950 + }, + { + "epoch": 0.47003583343630295, + "grad_norm": 1.2195854283374437, + "learning_rate": 1.8194513453320387e-05, + "loss": 0.3054324686527252, + "step": 951 + }, + { + "epoch": 0.47053008773013716, + "grad_norm": 1.0538248118306075, + "learning_rate": 1.8189827753914282e-05, + "loss": 0.35003694891929626, + "step": 952 + }, + { + "epoch": 0.4710243420239713, + "grad_norm": 1.1789267282791076, + "learning_rate": 1.8185136587096193e-05, + "loss": 0.37834814190864563, + "step": 953 + }, + { + "epoch": 0.4715185963178055, + "grad_norm": 1.0741971770420784, + "learning_rate": 1.8180439955997867e-05, + "loss": 0.3369285464286804, + "step": 954 + }, + { + "epoch": 0.47201285061163967, + "grad_norm": 1.010532535770725, + "learning_rate": 1.8175737863754706e-05, + "loss": 0.3612895905971527, + "step": 955 + }, + { + "epoch": 0.4725071049054739, + "grad_norm": 1.057430538694607, + "learning_rate": 1.817103031350577e-05, + "loss": 0.34393271803855896, + "step": 956 + }, + { + "epoch": 0.473001359199308, + "grad_norm": 1.0983705860238564, + "learning_rate": 1.8166317308393745e-05, + "loss": 0.3824620544910431, + "step": 957 + }, + { + "epoch": 0.47349561349314223, + "grad_norm": 1.0093831974265368, + "learning_rate": 1.816159885156497e-05, + "loss": 0.3092145621776581, + "step": 958 + }, + { + "epoch": 0.4739898677869764, + "grad_norm": 0.9971938324913802, + "learning_rate": 1.8156874946169414e-05, + "loss": 0.3328183889389038, + "step": 959 + }, + { + "epoch": 0.4744841220808106, + "grad_norm": 1.1071894513842127, + "learning_rate": 1.815214559536069e-05, + "loss": 0.3715244233608246, + "step": 960 + }, + { + "epoch": 0.47497837637464474, + "grad_norm": 0.9615506144211561, + "learning_rate": 1.814741080229605e-05, + "loss": 0.31065690517425537, + "step": 961 + }, + { + "epoch": 0.47547263066847895, + "grad_norm": 1.0443475280559777, + "learning_rate": 1.814267057013637e-05, + "loss": 0.3632475733757019, + "step": 962 + }, + { + "epoch": 0.4759668849623131, + "grad_norm": 1.0447314581931118, + "learning_rate": 1.813792490204616e-05, + "loss": 0.3367992043495178, + "step": 963 + }, + { + "epoch": 0.4764611392561473, + "grad_norm": 3.0902704784337263, + "learning_rate": 1.813317380119356e-05, + "loss": 0.37678295373916626, + "step": 964 + }, + { + "epoch": 0.47695539354998145, + "grad_norm": 1.092515860835368, + "learning_rate": 1.8128417270750342e-05, + "loss": 0.31454166769981384, + "step": 965 + }, + { + "epoch": 0.47744964784381566, + "grad_norm": 1.1351912635055343, + "learning_rate": 1.81236553138919e-05, + "loss": 0.38495004177093506, + "step": 966 + }, + { + "epoch": 0.4779439021376498, + "grad_norm": 1.1935841314497264, + "learning_rate": 1.8118887933797237e-05, + "loss": 0.3867315948009491, + "step": 967 + }, + { + "epoch": 0.478438156431484, + "grad_norm": 1.0520609240642282, + "learning_rate": 1.8114115133648996e-05, + "loss": 0.3453156650066376, + "step": 968 + }, + { + "epoch": 0.47893241072531817, + "grad_norm": 1.0244115852831113, + "learning_rate": 1.8109336916633426e-05, + "loss": 0.34461456537246704, + "step": 969 + }, + { + "epoch": 0.4794266650191524, + "grad_norm": 1.0814329785787762, + "learning_rate": 1.8104553285940404e-05, + "loss": 0.36489856243133545, + "step": 970 + }, + { + "epoch": 0.4799209193129865, + "grad_norm": 1.0551232871498393, + "learning_rate": 1.80997642447634e-05, + "loss": 0.3596840500831604, + "step": 971 + }, + { + "epoch": 0.48041517360682073, + "grad_norm": 1.1473167291229827, + "learning_rate": 1.8094969796299527e-05, + "loss": 0.3856956362724304, + "step": 972 + }, + { + "epoch": 0.4809094279006549, + "grad_norm": 1.036679746340059, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.3235170245170593, + "step": 973 + }, + { + "epoch": 0.4814036821944891, + "grad_norm": 0.9980037023378185, + "learning_rate": 1.8085364690317564e-05, + "loss": 0.28033584356307983, + "step": 974 + }, + { + "epoch": 0.48189793648832324, + "grad_norm": 1.0350899218465197, + "learning_rate": 1.808055403921171e-05, + "loss": 0.3279935419559479, + "step": 975 + }, + { + "epoch": 0.4823921907821574, + "grad_norm": 1.1400322966724836, + "learning_rate": 1.8075737993643442e-05, + "loss": 0.36426058411598206, + "step": 976 + }, + { + "epoch": 0.4828864450759916, + "grad_norm": 1.062964412180167, + "learning_rate": 1.8070916556827876e-05, + "loss": 0.3720256984233856, + "step": 977 + }, + { + "epoch": 0.48338069936982575, + "grad_norm": 1.1101144076762623, + "learning_rate": 1.8066089731983735e-05, + "loss": 0.3299727439880371, + "step": 978 + }, + { + "epoch": 0.48387495366365996, + "grad_norm": 1.1080862284860111, + "learning_rate": 1.8061257522333338e-05, + "loss": 0.3425888419151306, + "step": 979 + }, + { + "epoch": 0.4843692079574941, + "grad_norm": 1.1899160965861721, + "learning_rate": 1.80564199311026e-05, + "loss": 0.34109392762184143, + "step": 980 + }, + { + "epoch": 0.4848634622513283, + "grad_norm": 1.017538963669655, + "learning_rate": 1.805157696152103e-05, + "loss": 0.29130926728248596, + "step": 981 + }, + { + "epoch": 0.48535771654516247, + "grad_norm": 1.1092378859222098, + "learning_rate": 1.8046728616821726e-05, + "loss": 0.36200815439224243, + "step": 982 + }, + { + "epoch": 0.48585197083899667, + "grad_norm": 1.3150178990962822, + "learning_rate": 1.8041874900241368e-05, + "loss": 0.3343828320503235, + "step": 983 + }, + { + "epoch": 0.4863462251328308, + "grad_norm": 0.9882024578614582, + "learning_rate": 1.803701581502023e-05, + "loss": 0.32942160964012146, + "step": 984 + }, + { + "epoch": 0.48684047942666503, + "grad_norm": 0.9909863431121513, + "learning_rate": 1.803215136440217e-05, + "loss": 0.34390491247177124, + "step": 985 + }, + { + "epoch": 0.4873347337204992, + "grad_norm": 1.1118778887065912, + "learning_rate": 1.8027281551634622e-05, + "loss": 0.37723374366760254, + "step": 986 + }, + { + "epoch": 0.4878289880143334, + "grad_norm": 1.0469525424396737, + "learning_rate": 1.802240637996861e-05, + "loss": 0.3493693470954895, + "step": 987 + }, + { + "epoch": 0.48832324230816754, + "grad_norm": 1.131021341780466, + "learning_rate": 1.8017525852658723e-05, + "loss": 0.3564317524433136, + "step": 988 + }, + { + "epoch": 0.48881749660200174, + "grad_norm": 1.0435634175515676, + "learning_rate": 1.8012639972963136e-05, + "loss": 0.36572349071502686, + "step": 989 + }, + { + "epoch": 0.4893117508958359, + "grad_norm": 1.0078714155476896, + "learning_rate": 1.8007748744143586e-05, + "loss": 0.31457674503326416, + "step": 990 + }, + { + "epoch": 0.4898060051896701, + "grad_norm": 1.1126722971991523, + "learning_rate": 1.8002852169465393e-05, + "loss": 0.36191096901893616, + "step": 991 + }, + { + "epoch": 0.49030025948350425, + "grad_norm": 1.0321930748215848, + "learning_rate": 1.799795025219744e-05, + "loss": 0.33284491300582886, + "step": 992 + }, + { + "epoch": 0.49079451377733846, + "grad_norm": 1.0239281284644144, + "learning_rate": 1.7993042995612172e-05, + "loss": 0.3101437985897064, + "step": 993 + }, + { + "epoch": 0.4912887680711726, + "grad_norm": 1.033291904553078, + "learning_rate": 1.7988130402985608e-05, + "loss": 0.3196948170661926, + "step": 994 + }, + { + "epoch": 0.4917830223650068, + "grad_norm": 1.1489266069218314, + "learning_rate": 1.7983212477597325e-05, + "loss": 0.3757585883140564, + "step": 995 + }, + { + "epoch": 0.49227727665884097, + "grad_norm": 1.1725728838471274, + "learning_rate": 1.7978289222730454e-05, + "loss": 0.3949659466743469, + "step": 996 + }, + { + "epoch": 0.4927715309526752, + "grad_norm": 1.1279800728609437, + "learning_rate": 1.79733606416717e-05, + "loss": 0.3490184545516968, + "step": 997 + }, + { + "epoch": 0.4932657852465093, + "grad_norm": 1.2158784468170585, + "learning_rate": 1.7968426737711304e-05, + "loss": 0.32302743196487427, + "step": 998 + }, + { + "epoch": 0.49376003954034353, + "grad_norm": 1.1923748239050125, + "learning_rate": 1.7963487514143073e-05, + "loss": 0.4205089807510376, + "step": 999 + }, + { + "epoch": 0.4942542938341777, + "grad_norm": 1.0997609009048648, + "learning_rate": 1.7958542974264363e-05, + "loss": 0.30787885189056396, + "step": 1000 + }, + { + "epoch": 0.4947485481280119, + "grad_norm": 0.9527130505595168, + "learning_rate": 1.7953593121376075e-05, + "loss": 0.3174916207790375, + "step": 1001 + }, + { + "epoch": 0.49524280242184604, + "grad_norm": 0.9736659707101099, + "learning_rate": 1.7948637958782662e-05, + "loss": 0.330039381980896, + "step": 1002 + }, + { + "epoch": 0.4957370567156802, + "grad_norm": 1.0487288206783625, + "learning_rate": 1.794367748979212e-05, + "loss": 0.3362613320350647, + "step": 1003 + }, + { + "epoch": 0.4962313110095144, + "grad_norm": 1.065682818958373, + "learning_rate": 1.793871171771599e-05, + "loss": 0.3479865789413452, + "step": 1004 + }, + { + "epoch": 0.49672556530334855, + "grad_norm": 1.0920057715386207, + "learning_rate": 1.7933740645869345e-05, + "loss": 0.361303448677063, + "step": 1005 + }, + { + "epoch": 0.49721981959718276, + "grad_norm": 1.07605927747069, + "learning_rate": 1.79287642775708e-05, + "loss": 0.32340794801712036, + "step": 1006 + }, + { + "epoch": 0.4977140738910169, + "grad_norm": 1.086462795838887, + "learning_rate": 1.792378261614252e-05, + "loss": 0.3410148620605469, + "step": 1007 + }, + { + "epoch": 0.4982083281848511, + "grad_norm": 1.0450045575623719, + "learning_rate": 1.791879566491018e-05, + "loss": 0.3332127034664154, + "step": 1008 + }, + { + "epoch": 0.49870258247868526, + "grad_norm": 1.1673390171795246, + "learning_rate": 1.7913803427202998e-05, + "loss": 0.36532774567604065, + "step": 1009 + }, + { + "epoch": 0.49919683677251947, + "grad_norm": 1.1838892890378474, + "learning_rate": 1.7908805906353725e-05, + "loss": 0.3721959888935089, + "step": 1010 + }, + { + "epoch": 0.4996910910663536, + "grad_norm": 0.990806411218012, + "learning_rate": 1.7903803105698627e-05, + "loss": 0.3406672477722168, + "step": 1011 + }, + { + "epoch": 0.5001853453601878, + "grad_norm": 1.0152890264941994, + "learning_rate": 1.789879502857751e-05, + "loss": 0.323926717042923, + "step": 1012 + }, + { + "epoch": 0.500679599654022, + "grad_norm": 1.082078334287421, + "learning_rate": 1.7893781678333694e-05, + "loss": 0.36245018243789673, + "step": 1013 + }, + { + "epoch": 0.5011738539478562, + "grad_norm": 1.1363612319173766, + "learning_rate": 1.7888763058314016e-05, + "loss": 0.36145877838134766, + "step": 1014 + }, + { + "epoch": 0.5016681082416904, + "grad_norm": 0.9479821815236287, + "learning_rate": 1.788373917186884e-05, + "loss": 0.31398001313209534, + "step": 1015 + }, + { + "epoch": 0.5021623625355245, + "grad_norm": 1.0634976007398544, + "learning_rate": 1.7878710022352033e-05, + "loss": 0.36732447147369385, + "step": 1016 + }, + { + "epoch": 0.5026566168293587, + "grad_norm": 1.0888289854290114, + "learning_rate": 1.787367561312099e-05, + "loss": 0.3336929678916931, + "step": 1017 + }, + { + "epoch": 0.5031508711231929, + "grad_norm": 1.081948070644993, + "learning_rate": 1.786863594753661e-05, + "loss": 0.33306068181991577, + "step": 1018 + }, + { + "epoch": 0.5036451254170271, + "grad_norm": 1.1710814753085148, + "learning_rate": 1.7863591028963297e-05, + "loss": 0.32577213644981384, + "step": 1019 + }, + { + "epoch": 0.5041393797108612, + "grad_norm": 1.0902819718302648, + "learning_rate": 1.7858540860768974e-05, + "loss": 0.33542972803115845, + "step": 1020 + }, + { + "epoch": 0.5046336340046954, + "grad_norm": 1.1116685663765398, + "learning_rate": 1.7853485446325055e-05, + "loss": 0.3075249195098877, + "step": 1021 + }, + { + "epoch": 0.5051278882985296, + "grad_norm": 1.135601263046101, + "learning_rate": 1.7848424789006466e-05, + "loss": 0.3473510146141052, + "step": 1022 + }, + { + "epoch": 0.5056221425923638, + "grad_norm": 1.2152682076096186, + "learning_rate": 1.784335889219163e-05, + "loss": 0.3543929159641266, + "step": 1023 + }, + { + "epoch": 0.5061163968861979, + "grad_norm": 1.026549045591816, + "learning_rate": 1.783828775926246e-05, + "loss": 0.3198593556880951, + "step": 1024 + }, + { + "epoch": 0.5066106511800321, + "grad_norm": 1.07796975394457, + "learning_rate": 1.783321139360438e-05, + "loss": 0.34223973751068115, + "step": 1025 + }, + { + "epoch": 0.5071049054738663, + "grad_norm": 1.2487195797385122, + "learning_rate": 1.78281297986063e-05, + "loss": 0.3895387351512909, + "step": 1026 + }, + { + "epoch": 0.5075991597677005, + "grad_norm": 1.0333211037977794, + "learning_rate": 1.782304297766061e-05, + "loss": 0.35764580965042114, + "step": 1027 + }, + { + "epoch": 0.5080934140615346, + "grad_norm": 0.9679048017438919, + "learning_rate": 1.7817950934163213e-05, + "loss": 0.30859488248825073, + "step": 1028 + }, + { + "epoch": 0.5085876683553688, + "grad_norm": 1.0913185130679384, + "learning_rate": 1.7812853671513472e-05, + "loss": 0.3554389476776123, + "step": 1029 + }, + { + "epoch": 0.509081922649203, + "grad_norm": 1.0101463789736986, + "learning_rate": 1.7807751193114254e-05, + "loss": 0.3528766632080078, + "step": 1030 + }, + { + "epoch": 0.5095761769430371, + "grad_norm": 1.054067237260528, + "learning_rate": 1.78026435023719e-05, + "loss": 0.3645275831222534, + "step": 1031 + }, + { + "epoch": 0.5100704312368713, + "grad_norm": 1.338540047449502, + "learning_rate": 1.779753060269623e-05, + "loss": 0.3137075901031494, + "step": 1032 + }, + { + "epoch": 0.5105646855307056, + "grad_norm": 1.0928434325752037, + "learning_rate": 1.7792412497500538e-05, + "loss": 0.31993091106414795, + "step": 1033 + }, + { + "epoch": 0.5110589398245398, + "grad_norm": 1.032718640643118, + "learning_rate": 1.7787289190201606e-05, + "loss": 0.3514295220375061, + "step": 1034 + }, + { + "epoch": 0.5115531941183739, + "grad_norm": 0.9529992201270954, + "learning_rate": 1.7782160684219677e-05, + "loss": 0.3167670667171478, + "step": 1035 + }, + { + "epoch": 0.5120474484122081, + "grad_norm": 1.1056391999630892, + "learning_rate": 1.7777026982978473e-05, + "loss": 0.3298097252845764, + "step": 1036 + }, + { + "epoch": 0.5125417027060423, + "grad_norm": 1.008539858185866, + "learning_rate": 1.777188808990517e-05, + "loss": 0.3334948420524597, + "step": 1037 + }, + { + "epoch": 0.5130359569998765, + "grad_norm": 1.1451382861648118, + "learning_rate": 1.776674400843043e-05, + "loss": 0.3705115020275116, + "step": 1038 + }, + { + "epoch": 0.5135302112937106, + "grad_norm": 1.2062150323771585, + "learning_rate": 1.7761594741988356e-05, + "loss": 0.3586978614330292, + "step": 1039 + }, + { + "epoch": 0.5140244655875448, + "grad_norm": 0.9949081741462515, + "learning_rate": 1.7756440294016535e-05, + "loss": 0.3105466663837433, + "step": 1040 + }, + { + "epoch": 0.514518719881379, + "grad_norm": 1.240576049327348, + "learning_rate": 1.7751280667956002e-05, + "loss": 0.35213470458984375, + "step": 1041 + }, + { + "epoch": 0.5150129741752132, + "grad_norm": 1.1494264660428748, + "learning_rate": 1.7746115867251245e-05, + "loss": 0.3830525875091553, + "step": 1042 + }, + { + "epoch": 0.5155072284690473, + "grad_norm": 1.044917786849415, + "learning_rate": 1.7740945895350215e-05, + "loss": 0.34106165170669556, + "step": 1043 + }, + { + "epoch": 0.5160014827628815, + "grad_norm": 0.9456529066854209, + "learning_rate": 1.773577075570431e-05, + "loss": 0.33408549427986145, + "step": 1044 + }, + { + "epoch": 0.5164957370567157, + "grad_norm": 1.057634132461443, + "learning_rate": 1.7730590451768375e-05, + "loss": 0.32823115587234497, + "step": 1045 + }, + { + "epoch": 0.5169899913505499, + "grad_norm": 0.9870247990943719, + "learning_rate": 1.7725404987000716e-05, + "loss": 0.2866591811180115, + "step": 1046 + }, + { + "epoch": 0.517484245644384, + "grad_norm": 1.0669638645996897, + "learning_rate": 1.772021436486307e-05, + "loss": 0.34053099155426025, + "step": 1047 + }, + { + "epoch": 0.5179784999382182, + "grad_norm": 1.0384310943814752, + "learning_rate": 1.771501858882062e-05, + "loss": 0.30379486083984375, + "step": 1048 + }, + { + "epoch": 0.5184727542320524, + "grad_norm": 1.299899967945095, + "learning_rate": 1.7709817662341998e-05, + "loss": 0.37569302320480347, + "step": 1049 + }, + { + "epoch": 0.5189670085258866, + "grad_norm": 1.0489606422309163, + "learning_rate": 1.770461158889926e-05, + "loss": 0.31770390272140503, + "step": 1050 + }, + { + "epoch": 0.5194612628197207, + "grad_norm": 1.1640089464310481, + "learning_rate": 1.769940037196791e-05, + "loss": 0.34175002574920654, + "step": 1051 + }, + { + "epoch": 0.5199555171135549, + "grad_norm": 1.0797819699416114, + "learning_rate": 1.769418401502689e-05, + "loss": 0.3634580671787262, + "step": 1052 + }, + { + "epoch": 0.5204497714073891, + "grad_norm": 1.1990448584577926, + "learning_rate": 1.7688962521558554e-05, + "loss": 0.3631044030189514, + "step": 1053 + }, + { + "epoch": 0.5209440257012233, + "grad_norm": 1.2482048374766477, + "learning_rate": 1.7683735895048698e-05, + "loss": 0.3402160704135895, + "step": 1054 + }, + { + "epoch": 0.5214382799950574, + "grad_norm": 1.2190765212037056, + "learning_rate": 1.7678504138986548e-05, + "loss": 0.3895665407180786, + "step": 1055 + }, + { + "epoch": 0.5219325342888916, + "grad_norm": 1.076846194861831, + "learning_rate": 1.767326725686475e-05, + "loss": 0.32207030057907104, + "step": 1056 + }, + { + "epoch": 0.5224267885827258, + "grad_norm": 1.10282378456951, + "learning_rate": 1.7668025252179363e-05, + "loss": 0.33095866441726685, + "step": 1057 + }, + { + "epoch": 0.5229210428765599, + "grad_norm": 1.1487800022178571, + "learning_rate": 1.7662778128429883e-05, + "loss": 0.33239442110061646, + "step": 1058 + }, + { + "epoch": 0.5234152971703941, + "grad_norm": 0.9873637767970463, + "learning_rate": 1.7657525889119212e-05, + "loss": 0.27432021498680115, + "step": 1059 + }, + { + "epoch": 0.5239095514642284, + "grad_norm": 1.0928994862368866, + "learning_rate": 1.7652268537753672e-05, + "loss": 0.3221333622932434, + "step": 1060 + }, + { + "epoch": 0.5244038057580626, + "grad_norm": 1.114838100134283, + "learning_rate": 1.764700607784299e-05, + "loss": 0.3126341700553894, + "step": 1061 + }, + { + "epoch": 0.5248980600518967, + "grad_norm": 1.0401864286303986, + "learning_rate": 1.7641738512900315e-05, + "loss": 0.33239883184432983, + "step": 1062 + }, + { + "epoch": 0.5253923143457309, + "grad_norm": 0.9509614150111031, + "learning_rate": 1.7636465846442197e-05, + "loss": 0.30075010657310486, + "step": 1063 + }, + { + "epoch": 0.5258865686395651, + "grad_norm": 1.0717488761603333, + "learning_rate": 1.763118808198859e-05, + "loss": 0.3577713370323181, + "step": 1064 + }, + { + "epoch": 0.5263808229333993, + "grad_norm": 1.0802706273753335, + "learning_rate": 1.7625905223062858e-05, + "loss": 0.3483964204788208, + "step": 1065 + }, + { + "epoch": 0.5268750772272334, + "grad_norm": 1.1651963376515642, + "learning_rate": 1.762061727319176e-05, + "loss": 0.3622454106807709, + "step": 1066 + }, + { + "epoch": 0.5273693315210676, + "grad_norm": 1.0440643033385941, + "learning_rate": 1.761532423590545e-05, + "loss": 0.35156917572021484, + "step": 1067 + }, + { + "epoch": 0.5278635858149018, + "grad_norm": 1.1589394381083906, + "learning_rate": 1.7610026114737498e-05, + "loss": 0.3413820266723633, + "step": 1068 + }, + { + "epoch": 0.528357840108736, + "grad_norm": 1.1280561588615983, + "learning_rate": 1.760472291322484e-05, + "loss": 0.3707934021949768, + "step": 1069 + }, + { + "epoch": 0.5288520944025701, + "grad_norm": 1.2170503232061094, + "learning_rate": 1.7599414634907828e-05, + "loss": 0.3472951054573059, + "step": 1070 + }, + { + "epoch": 0.5293463486964043, + "grad_norm": 1.1676650140216285, + "learning_rate": 1.7594101283330184e-05, + "loss": 0.393882155418396, + "step": 1071 + }, + { + "epoch": 0.5298406029902385, + "grad_norm": 0.9683606994511744, + "learning_rate": 1.758878286203903e-05, + "loss": 0.3094913065433502, + "step": 1072 + }, + { + "epoch": 0.5303348572840727, + "grad_norm": 1.09347684867524, + "learning_rate": 1.758345937458487e-05, + "loss": 0.33904048800468445, + "step": 1073 + }, + { + "epoch": 0.5308291115779068, + "grad_norm": 1.0218184375103434, + "learning_rate": 1.7578130824521585e-05, + "loss": 0.3218901753425598, + "step": 1074 + }, + { + "epoch": 0.531323365871741, + "grad_norm": 0.95615697696865, + "learning_rate": 1.7572797215406442e-05, + "loss": 0.31584852933883667, + "step": 1075 + }, + { + "epoch": 0.5318176201655752, + "grad_norm": 0.9682503945021611, + "learning_rate": 1.756745855080008e-05, + "loss": 0.3449877202510834, + "step": 1076 + }, + { + "epoch": 0.5323118744594094, + "grad_norm": 1.084607183777355, + "learning_rate": 1.756211483426651e-05, + "loss": 0.3544886112213135, + "step": 1077 + }, + { + "epoch": 0.5328061287532435, + "grad_norm": 1.1680618553038933, + "learning_rate": 1.755676606937313e-05, + "loss": 0.34360697865486145, + "step": 1078 + }, + { + "epoch": 0.5333003830470777, + "grad_norm": 1.0514045755368502, + "learning_rate": 1.7551412259690695e-05, + "loss": 0.3214710056781769, + "step": 1079 + }, + { + "epoch": 0.5337946373409119, + "grad_norm": 0.9951048830690797, + "learning_rate": 1.754605340879333e-05, + "loss": 0.33841896057128906, + "step": 1080 + }, + { + "epoch": 0.534288891634746, + "grad_norm": 1.0536673015942455, + "learning_rate": 1.7540689520258532e-05, + "loss": 0.3134745657444, + "step": 1081 + }, + { + "epoch": 0.5347831459285802, + "grad_norm": 1.1773503335041235, + "learning_rate": 1.753532059766715e-05, + "loss": 0.3469204306602478, + "step": 1082 + }, + { + "epoch": 0.5352774002224144, + "grad_norm": 1.3802140663046265, + "learning_rate": 1.752994664460341e-05, + "loss": 0.39217621088027954, + "step": 1083 + }, + { + "epoch": 0.5357716545162486, + "grad_norm": 1.148906185686213, + "learning_rate": 1.7524567664654873e-05, + "loss": 0.34482622146606445, + "step": 1084 + }, + { + "epoch": 0.5362659088100827, + "grad_norm": 1.0089175831530743, + "learning_rate": 1.751918366141248e-05, + "loss": 0.308369517326355, + "step": 1085 + }, + { + "epoch": 0.5367601631039169, + "grad_norm": 1.1441511379564429, + "learning_rate": 1.751379463847051e-05, + "loss": 0.3396676480770111, + "step": 1086 + }, + { + "epoch": 0.5372544173977511, + "grad_norm": 1.0963418237920814, + "learning_rate": 1.7508400599426596e-05, + "loss": 0.3059370517730713, + "step": 1087 + }, + { + "epoch": 0.5377486716915854, + "grad_norm": 0.993693807257297, + "learning_rate": 1.7503001547881728e-05, + "loss": 0.31689077615737915, + "step": 1088 + }, + { + "epoch": 0.5382429259854195, + "grad_norm": 1.2996366258679217, + "learning_rate": 1.749759748744023e-05, + "loss": 0.37134337425231934, + "step": 1089 + }, + { + "epoch": 0.5387371802792537, + "grad_norm": 1.0586799377490923, + "learning_rate": 1.7492188421709775e-05, + "loss": 0.30404967069625854, + "step": 1090 + }, + { + "epoch": 0.5392314345730879, + "grad_norm": 1.1213884593031693, + "learning_rate": 1.7486774354301382e-05, + "loss": 0.34773269295692444, + "step": 1091 + }, + { + "epoch": 0.5397256888669221, + "grad_norm": 1.135256212480744, + "learning_rate": 1.7481355288829404e-05, + "loss": 0.34448760747909546, + "step": 1092 + }, + { + "epoch": 0.5402199431607562, + "grad_norm": 1.1111138178806874, + "learning_rate": 1.7475931228911526e-05, + "loss": 0.33557915687561035, + "step": 1093 + }, + { + "epoch": 0.5407141974545904, + "grad_norm": 1.1277612406863344, + "learning_rate": 1.7470502178168783e-05, + "loss": 0.3216322362422943, + "step": 1094 + }, + { + "epoch": 0.5412084517484246, + "grad_norm": 1.1416777218141756, + "learning_rate": 1.7465068140225524e-05, + "loss": 0.3175346255302429, + "step": 1095 + }, + { + "epoch": 0.5417027060422588, + "grad_norm": 1.0466005920407673, + "learning_rate": 1.7459629118709435e-05, + "loss": 0.3150678277015686, + "step": 1096 + }, + { + "epoch": 0.5421969603360929, + "grad_norm": 1.1080261557130098, + "learning_rate": 1.7454185117251534e-05, + "loss": 0.3372325897216797, + "step": 1097 + }, + { + "epoch": 0.5426912146299271, + "grad_norm": 1.1607395393986693, + "learning_rate": 1.7448736139486156e-05, + "loss": 0.3460095524787903, + "step": 1098 + }, + { + "epoch": 0.5431854689237613, + "grad_norm": 1.0960477562857334, + "learning_rate": 1.7443282189050964e-05, + "loss": 0.3465900421142578, + "step": 1099 + }, + { + "epoch": 0.5436797232175955, + "grad_norm": 1.1271957826518202, + "learning_rate": 1.7437823269586925e-05, + "loss": 0.3707941174507141, + "step": 1100 + }, + { + "epoch": 0.5441739775114296, + "grad_norm": 1.0732325510644303, + "learning_rate": 1.7432359384738354e-05, + "loss": 0.3317713141441345, + "step": 1101 + }, + { + "epoch": 0.5446682318052638, + "grad_norm": 1.10075448775578, + "learning_rate": 1.742689053815285e-05, + "loss": 0.3391956090927124, + "step": 1102 + }, + { + "epoch": 0.545162486099098, + "grad_norm": 1.483156522178114, + "learning_rate": 1.742141673348134e-05, + "loss": 0.3838513195514679, + "step": 1103 + }, + { + "epoch": 0.5456567403929322, + "grad_norm": 1.2368776155357775, + "learning_rate": 1.7415937974378057e-05, + "loss": 0.4438849687576294, + "step": 1104 + }, + { + "epoch": 0.5461509946867663, + "grad_norm": 1.1360365035496875, + "learning_rate": 1.7410454264500542e-05, + "loss": 0.35329896211624146, + "step": 1105 + }, + { + "epoch": 0.5466452489806005, + "grad_norm": 0.9946710480219276, + "learning_rate": 1.7404965607509646e-05, + "loss": 0.3124481439590454, + "step": 1106 + }, + { + "epoch": 0.5471395032744347, + "grad_norm": 1.1827285369169889, + "learning_rate": 1.739947200706951e-05, + "loss": 0.3595995008945465, + "step": 1107 + }, + { + "epoch": 0.5476337575682688, + "grad_norm": 1.0771205850736374, + "learning_rate": 1.7393973466847592e-05, + "loss": 0.35914891958236694, + "step": 1108 + }, + { + "epoch": 0.548128011862103, + "grad_norm": 1.0372075645038734, + "learning_rate": 1.7388469990514636e-05, + "loss": 0.34034737944602966, + "step": 1109 + }, + { + "epoch": 0.5486222661559372, + "grad_norm": 0.9639792162761298, + "learning_rate": 1.7382961581744677e-05, + "loss": 0.3033643066883087, + "step": 1110 + }, + { + "epoch": 0.5491165204497714, + "grad_norm": 1.0333536833038373, + "learning_rate": 1.737744824421506e-05, + "loss": 0.3239862322807312, + "step": 1111 + }, + { + "epoch": 0.5496107747436055, + "grad_norm": 1.0992782883377998, + "learning_rate": 1.7371929981606403e-05, + "loss": 0.36473411321640015, + "step": 1112 + }, + { + "epoch": 0.5501050290374397, + "grad_norm": 0.9808971248907185, + "learning_rate": 1.7366406797602625e-05, + "loss": 0.3129761517047882, + "step": 1113 + }, + { + "epoch": 0.550599283331274, + "grad_norm": 1.0031500416462213, + "learning_rate": 1.736087869589092e-05, + "loss": 0.30224812030792236, + "step": 1114 + }, + { + "epoch": 0.5510935376251082, + "grad_norm": 1.0008522519559948, + "learning_rate": 1.7355345680161774e-05, + "loss": 0.30045247077941895, + "step": 1115 + }, + { + "epoch": 0.5515877919189422, + "grad_norm": 1.1079372723945795, + "learning_rate": 1.7349807754108944e-05, + "loss": 0.3356926739215851, + "step": 1116 + }, + { + "epoch": 0.5520820462127765, + "grad_norm": 1.3704982317685879, + "learning_rate": 1.7344264921429475e-05, + "loss": 0.37749868631362915, + "step": 1117 + }, + { + "epoch": 0.5525763005066107, + "grad_norm": 1.0400914273370205, + "learning_rate": 1.733871718582368e-05, + "loss": 0.331012099981308, + "step": 1118 + }, + { + "epoch": 0.5530705548004449, + "grad_norm": 1.2654046748606915, + "learning_rate": 1.7333164550995153e-05, + "loss": 0.3557187020778656, + "step": 1119 + }, + { + "epoch": 0.553564809094279, + "grad_norm": 1.151377810019934, + "learning_rate": 1.7327607020650744e-05, + "loss": 0.34102991223335266, + "step": 1120 + }, + { + "epoch": 0.5540590633881132, + "grad_norm": 1.0397881413898085, + "learning_rate": 1.7322044598500594e-05, + "loss": 0.328019917011261, + "step": 1121 + }, + { + "epoch": 0.5545533176819474, + "grad_norm": 1.0773058589187376, + "learning_rate": 1.7316477288258085e-05, + "loss": 0.33980751037597656, + "step": 1122 + }, + { + "epoch": 0.5550475719757816, + "grad_norm": 1.1823119583137516, + "learning_rate": 1.731090509363988e-05, + "loss": 0.3460109233856201, + "step": 1123 + }, + { + "epoch": 0.5555418262696157, + "grad_norm": 1.0727245460190564, + "learning_rate": 1.730532801836589e-05, + "loss": 0.3013002276420593, + "step": 1124 + }, + { + "epoch": 0.5560360805634499, + "grad_norm": 1.191952525403325, + "learning_rate": 1.72997460661593e-05, + "loss": 0.36195772886276245, + "step": 1125 + }, + { + "epoch": 0.5565303348572841, + "grad_norm": 1.1481571926267522, + "learning_rate": 1.7294159240746532e-05, + "loss": 0.3368675112724304, + "step": 1126 + }, + { + "epoch": 0.5570245891511183, + "grad_norm": 1.0950064938478345, + "learning_rate": 1.7288567545857283e-05, + "loss": 0.36618539690971375, + "step": 1127 + }, + { + "epoch": 0.5575188434449524, + "grad_norm": 1.0773610015009678, + "learning_rate": 1.7282970985224477e-05, + "loss": 0.3230215311050415, + "step": 1128 + }, + { + "epoch": 0.5580130977387866, + "grad_norm": 1.1539889538468413, + "learning_rate": 1.72773695625843e-05, + "loss": 0.38779711723327637, + "step": 1129 + }, + { + "epoch": 0.5585073520326208, + "grad_norm": 1.0853438524765577, + "learning_rate": 1.7271763281676187e-05, + "loss": 0.33910998702049255, + "step": 1130 + }, + { + "epoch": 0.559001606326455, + "grad_norm": 1.1265909455665821, + "learning_rate": 1.726615214624281e-05, + "loss": 0.3526651859283447, + "step": 1131 + }, + { + "epoch": 0.5594958606202891, + "grad_norm": 1.0899084132349224, + "learning_rate": 1.7260536160030077e-05, + "loss": 0.33794116973876953, + "step": 1132 + }, + { + "epoch": 0.5599901149141233, + "grad_norm": 1.2383181058563666, + "learning_rate": 1.7254915326787145e-05, + "loss": 0.3294123411178589, + "step": 1133 + }, + { + "epoch": 0.5604843692079575, + "grad_norm": 1.0381296685245769, + "learning_rate": 1.7249289650266402e-05, + "loss": 0.31193166971206665, + "step": 1134 + }, + { + "epoch": 0.5609786235017916, + "grad_norm": 1.0273514183990056, + "learning_rate": 1.7243659134223467e-05, + "loss": 0.298290491104126, + "step": 1135 + }, + { + "epoch": 0.5614728777956258, + "grad_norm": 1.0372406743131939, + "learning_rate": 1.7238023782417194e-05, + "loss": 0.3157176971435547, + "step": 1136 + }, + { + "epoch": 0.56196713208946, + "grad_norm": 0.9703670449018593, + "learning_rate": 1.7232383598609664e-05, + "loss": 0.3152535855770111, + "step": 1137 + }, + { + "epoch": 0.5624613863832942, + "grad_norm": 1.1457741905911056, + "learning_rate": 1.722673858656618e-05, + "loss": 0.35004952549934387, + "step": 1138 + }, + { + "epoch": 0.5629556406771283, + "grad_norm": 1.2128755723830003, + "learning_rate": 1.722108875005527e-05, + "loss": 0.3531174957752228, + "step": 1139 + }, + { + "epoch": 0.5634498949709625, + "grad_norm": 0.9896343114056704, + "learning_rate": 1.7215434092848693e-05, + "loss": 0.32532358169555664, + "step": 1140 + }, + { + "epoch": 0.5639441492647967, + "grad_norm": 1.086973420033045, + "learning_rate": 1.7209774618721408e-05, + "loss": 0.3252495229244232, + "step": 1141 + }, + { + "epoch": 0.564438403558631, + "grad_norm": 1.1232225314649664, + "learning_rate": 1.7204110331451603e-05, + "loss": 0.35428208112716675, + "step": 1142 + }, + { + "epoch": 0.564932657852465, + "grad_norm": 1.165276028587328, + "learning_rate": 1.7198441234820674e-05, + "loss": 0.37419646978378296, + "step": 1143 + }, + { + "epoch": 0.5654269121462993, + "grad_norm": 1.1206339776354848, + "learning_rate": 1.7192767332613235e-05, + "loss": 0.3342249095439911, + "step": 1144 + }, + { + "epoch": 0.5659211664401335, + "grad_norm": 1.0700889667237288, + "learning_rate": 1.7187088628617093e-05, + "loss": 0.36827898025512695, + "step": 1145 + }, + { + "epoch": 0.5664154207339677, + "grad_norm": 1.1884715403984119, + "learning_rate": 1.7181405126623275e-05, + "loss": 0.3560858964920044, + "step": 1146 + }, + { + "epoch": 0.5669096750278018, + "grad_norm": 1.0578073497156413, + "learning_rate": 1.7175716830426005e-05, + "loss": 0.35333797335624695, + "step": 1147 + }, + { + "epoch": 0.567403929321636, + "grad_norm": 1.0504095801617317, + "learning_rate": 1.71700237438227e-05, + "loss": 0.31053799390792847, + "step": 1148 + }, + { + "epoch": 0.5678981836154702, + "grad_norm": 1.1443484208273471, + "learning_rate": 1.7164325870613998e-05, + "loss": 0.37123826146125793, + "step": 1149 + }, + { + "epoch": 0.5683924379093044, + "grad_norm": 1.069054169156011, + "learning_rate": 1.715862321460371e-05, + "loss": 0.33981990814208984, + "step": 1150 + }, + { + "epoch": 0.5688866922031385, + "grad_norm": 1.1295222791710222, + "learning_rate": 1.7152915779598846e-05, + "loss": 0.34938257932662964, + "step": 1151 + }, + { + "epoch": 0.5693809464969727, + "grad_norm": 1.10704413276648, + "learning_rate": 1.714720356940961e-05, + "loss": 0.3069387376308441, + "step": 1152 + }, + { + "epoch": 0.5698752007908069, + "grad_norm": 1.1206304490989205, + "learning_rate": 1.7141486587849397e-05, + "loss": 0.34879156947135925, + "step": 1153 + }, + { + "epoch": 0.5703694550846411, + "grad_norm": 1.140159647567344, + "learning_rate": 1.7135764838734773e-05, + "loss": 0.3624545931816101, + "step": 1154 + }, + { + "epoch": 0.5708637093784752, + "grad_norm": 1.0671159168894162, + "learning_rate": 1.7130038325885502e-05, + "loss": 0.3548320531845093, + "step": 1155 + }, + { + "epoch": 0.5713579636723094, + "grad_norm": 1.0469806768045702, + "learning_rate": 1.7124307053124518e-05, + "loss": 0.3004404902458191, + "step": 1156 + }, + { + "epoch": 0.5718522179661436, + "grad_norm": 1.1058227077648823, + "learning_rate": 1.7118571024277943e-05, + "loss": 0.31545472145080566, + "step": 1157 + }, + { + "epoch": 0.5723464722599778, + "grad_norm": 1.100412587450837, + "learning_rate": 1.711283024317506e-05, + "loss": 0.3116477429866791, + "step": 1158 + }, + { + "epoch": 0.5728407265538119, + "grad_norm": 1.1169526030822408, + "learning_rate": 1.710708471364834e-05, + "loss": 0.3472268581390381, + "step": 1159 + }, + { + "epoch": 0.5733349808476461, + "grad_norm": 1.1641407854241053, + "learning_rate": 1.7101334439533414e-05, + "loss": 0.33334046602249146, + "step": 1160 + }, + { + "epoch": 0.5738292351414803, + "grad_norm": 1.1720238639752558, + "learning_rate": 1.7095579424669074e-05, + "loss": 0.3462664783000946, + "step": 1161 + }, + { + "epoch": 0.5743234894353144, + "grad_norm": 1.0854325044336006, + "learning_rate": 1.7089819672897304e-05, + "loss": 0.3241977393627167, + "step": 1162 + }, + { + "epoch": 0.5748177437291486, + "grad_norm": 1.2501733360326688, + "learning_rate": 1.7084055188063217e-05, + "loss": 0.3194134533405304, + "step": 1163 + }, + { + "epoch": 0.5753119980229828, + "grad_norm": 1.1336053472715226, + "learning_rate": 1.7078285974015103e-05, + "loss": 0.3644179701805115, + "step": 1164 + }, + { + "epoch": 0.575806252316817, + "grad_norm": 1.1434067682408584, + "learning_rate": 1.7072512034604412e-05, + "loss": 0.36653730273246765, + "step": 1165 + }, + { + "epoch": 0.5763005066106511, + "grad_norm": 1.1221051792069954, + "learning_rate": 1.706673337368574e-05, + "loss": 0.3435714840888977, + "step": 1166 + }, + { + "epoch": 0.5767947609044853, + "grad_norm": 1.0603782757024258, + "learning_rate": 1.706094999511684e-05, + "loss": 0.36935871839523315, + "step": 1167 + }, + { + "epoch": 0.5772890151983195, + "grad_norm": 0.9845968090919184, + "learning_rate": 1.7055161902758607e-05, + "loss": 0.29493796825408936, + "step": 1168 + }, + { + "epoch": 0.5777832694921538, + "grad_norm": 1.0115254154804856, + "learning_rate": 1.70493691004751e-05, + "loss": 0.32378828525543213, + "step": 1169 + }, + { + "epoch": 0.5782775237859878, + "grad_norm": 1.1123861652198228, + "learning_rate": 1.70435715921335e-05, + "loss": 0.3587600588798523, + "step": 1170 + }, + { + "epoch": 0.578771778079822, + "grad_norm": 1.1091481408248292, + "learning_rate": 1.703776938160415e-05, + "loss": 0.31885826587677, + "step": 1171 + }, + { + "epoch": 0.5792660323736563, + "grad_norm": 1.0414979222224348, + "learning_rate": 1.7031962472760514e-05, + "loss": 0.2950041890144348, + "step": 1172 + }, + { + "epoch": 0.5797602866674905, + "grad_norm": 1.121100234384589, + "learning_rate": 1.7026150869479208e-05, + "loss": 0.36190298199653625, + "step": 1173 + }, + { + "epoch": 0.5802545409613246, + "grad_norm": 1.067632760047313, + "learning_rate": 1.7020334575639972e-05, + "loss": 0.3402514159679413, + "step": 1174 + }, + { + "epoch": 0.5807487952551588, + "grad_norm": 0.9679286148168113, + "learning_rate": 1.7014513595125684e-05, + "loss": 0.3131282925605774, + "step": 1175 + }, + { + "epoch": 0.581243049548993, + "grad_norm": 1.056786860676952, + "learning_rate": 1.7008687931822344e-05, + "loss": 0.29499226808547974, + "step": 1176 + }, + { + "epoch": 0.5817373038428272, + "grad_norm": 1.0712930292635054, + "learning_rate": 1.700285758961908e-05, + "loss": 0.36821871995925903, + "step": 1177 + }, + { + "epoch": 0.5822315581366613, + "grad_norm": 1.2780126948070993, + "learning_rate": 1.6997022572408152e-05, + "loss": 0.31486836075782776, + "step": 1178 + }, + { + "epoch": 0.5827258124304955, + "grad_norm": 1.0778384840117066, + "learning_rate": 1.6991182884084928e-05, + "loss": 0.3176078498363495, + "step": 1179 + }, + { + "epoch": 0.5832200667243297, + "grad_norm": 1.294300282858588, + "learning_rate": 1.69853385285479e-05, + "loss": 0.4130980968475342, + "step": 1180 + }, + { + "epoch": 0.5837143210181639, + "grad_norm": 1.103648457674251, + "learning_rate": 1.697948950969868e-05, + "loss": 0.3164641857147217, + "step": 1181 + }, + { + "epoch": 0.584208575311998, + "grad_norm": 1.1707357674613739, + "learning_rate": 1.697363583144199e-05, + "loss": 0.36420726776123047, + "step": 1182 + }, + { + "epoch": 0.5847028296058322, + "grad_norm": 1.1827091905189109, + "learning_rate": 1.696777749768566e-05, + "loss": 0.3279833197593689, + "step": 1183 + }, + { + "epoch": 0.5851970838996664, + "grad_norm": 1.2462082843052198, + "learning_rate": 1.696191451234063e-05, + "loss": 0.311473548412323, + "step": 1184 + }, + { + "epoch": 0.5856913381935006, + "grad_norm": 1.0514702517271486, + "learning_rate": 1.6956046879320943e-05, + "loss": 0.32284629344940186, + "step": 1185 + }, + { + "epoch": 0.5861855924873347, + "grad_norm": 1.081683685343838, + "learning_rate": 1.6950174602543753e-05, + "loss": 0.3318635821342468, + "step": 1186 + }, + { + "epoch": 0.5866798467811689, + "grad_norm": 1.10655975155716, + "learning_rate": 1.6944297685929298e-05, + "loss": 0.3268307149410248, + "step": 1187 + }, + { + "epoch": 0.5871741010750031, + "grad_norm": 1.1757413336808826, + "learning_rate": 1.6938416133400934e-05, + "loss": 0.31885889172554016, + "step": 1188 + }, + { + "epoch": 0.5876683553688372, + "grad_norm": 1.044019985672413, + "learning_rate": 1.69325299488851e-05, + "loss": 0.29273971915245056, + "step": 1189 + }, + { + "epoch": 0.5881626096626714, + "grad_norm": 1.2128861059808687, + "learning_rate": 1.692663913631132e-05, + "loss": 0.3585188388824463, + "step": 1190 + }, + { + "epoch": 0.5886568639565056, + "grad_norm": 1.152183266519285, + "learning_rate": 1.6920743699612226e-05, + "loss": 0.37145692110061646, + "step": 1191 + }, + { + "epoch": 0.5891511182503398, + "grad_norm": 1.1211663085079848, + "learning_rate": 1.691484364272352e-05, + "loss": 0.34805262088775635, + "step": 1192 + }, + { + "epoch": 0.5896453725441739, + "grad_norm": 1.1094913177494823, + "learning_rate": 1.6908938969584002e-05, + "loss": 0.3540152907371521, + "step": 1193 + }, + { + "epoch": 0.5901396268380081, + "grad_norm": 1.1138288622940957, + "learning_rate": 1.6903029684135545e-05, + "loss": 0.35808512568473816, + "step": 1194 + }, + { + "epoch": 0.5906338811318423, + "grad_norm": 1.2028693910668573, + "learning_rate": 1.68971157903231e-05, + "loss": 0.2881169021129608, + "step": 1195 + }, + { + "epoch": 0.5911281354256765, + "grad_norm": 1.126509020875868, + "learning_rate": 1.6891197292094704e-05, + "loss": 0.33551955223083496, + "step": 1196 + }, + { + "epoch": 0.5916223897195106, + "grad_norm": 1.0141998416691063, + "learning_rate": 1.688527419340146e-05, + "loss": 0.30721622705459595, + "step": 1197 + }, + { + "epoch": 0.5921166440133449, + "grad_norm": 1.0876501850612135, + "learning_rate": 1.687934649819754e-05, + "loss": 0.3296341300010681, + "step": 1198 + }, + { + "epoch": 0.5926108983071791, + "grad_norm": 1.1194456964334092, + "learning_rate": 1.6873414210440194e-05, + "loss": 0.3511606454849243, + "step": 1199 + }, + { + "epoch": 0.5931051526010133, + "grad_norm": 1.0762712673108126, + "learning_rate": 1.6867477334089728e-05, + "loss": 0.34293919801712036, + "step": 1200 + }, + { + "epoch": 0.5935994068948474, + "grad_norm": 0.9942852659141888, + "learning_rate": 1.686153587310952e-05, + "loss": 0.3334580659866333, + "step": 1201 + }, + { + "epoch": 0.5940936611886816, + "grad_norm": 1.1354238373080972, + "learning_rate": 1.6855589831466e-05, + "loss": 0.3542851209640503, + "step": 1202 + }, + { + "epoch": 0.5945879154825158, + "grad_norm": 1.0952906678959344, + "learning_rate": 1.6849639213128667e-05, + "loss": 0.30951520800590515, + "step": 1203 + }, + { + "epoch": 0.59508216977635, + "grad_norm": 1.0716710567299268, + "learning_rate": 1.6843684022070062e-05, + "loss": 0.333478718996048, + "step": 1204 + }, + { + "epoch": 0.5955764240701841, + "grad_norm": 1.0944556204789582, + "learning_rate": 1.683772426226579e-05, + "loss": 0.33562588691711426, + "step": 1205 + }, + { + "epoch": 0.5960706783640183, + "grad_norm": 0.9136596878493712, + "learning_rate": 1.6831759937694497e-05, + "loss": 0.2626678943634033, + "step": 1206 + }, + { + "epoch": 0.5965649326578525, + "grad_norm": 1.1138721974001247, + "learning_rate": 1.6825791052337884e-05, + "loss": 0.349543035030365, + "step": 1207 + }, + { + "epoch": 0.5970591869516867, + "grad_norm": 1.0760285856821303, + "learning_rate": 1.6819817610180696e-05, + "loss": 0.3229057788848877, + "step": 1208 + }, + { + "epoch": 0.5975534412455208, + "grad_norm": 1.0511960959262137, + "learning_rate": 1.681383961521071e-05, + "loss": 0.32023823261260986, + "step": 1209 + }, + { + "epoch": 0.598047695539355, + "grad_norm": 1.0122201188951288, + "learning_rate": 1.680785707141876e-05, + "loss": 0.31556791067123413, + "step": 1210 + }, + { + "epoch": 0.5985419498331892, + "grad_norm": 1.1858949236151264, + "learning_rate": 1.68018699827987e-05, + "loss": 0.33287158608436584, + "step": 1211 + }, + { + "epoch": 0.5990362041270234, + "grad_norm": 1.0276520854994282, + "learning_rate": 1.6795878353347427e-05, + "loss": 0.28690433502197266, + "step": 1212 + }, + { + "epoch": 0.5995304584208575, + "grad_norm": 1.1202382723881081, + "learning_rate": 1.6789882187064862e-05, + "loss": 0.3501484990119934, + "step": 1213 + }, + { + "epoch": 0.6000247127146917, + "grad_norm": 1.15016872261832, + "learning_rate": 1.678388148795397e-05, + "loss": 0.3645259439945221, + "step": 1214 + }, + { + "epoch": 0.6005189670085259, + "grad_norm": 1.0232559071014062, + "learning_rate": 1.6777876260020726e-05, + "loss": 0.3270183801651001, + "step": 1215 + }, + { + "epoch": 0.60101322130236, + "grad_norm": 1.0680433488207848, + "learning_rate": 1.6771866507274132e-05, + "loss": 0.31767967343330383, + "step": 1216 + }, + { + "epoch": 0.6015074755961942, + "grad_norm": 1.0642272352631703, + "learning_rate": 1.6765852233726216e-05, + "loss": 0.3170120120048523, + "step": 1217 + }, + { + "epoch": 0.6020017298900284, + "grad_norm": 1.0689193394735252, + "learning_rate": 1.6759833443392022e-05, + "loss": 0.3270176351070404, + "step": 1218 + }, + { + "epoch": 0.6024959841838626, + "grad_norm": 1.0053062396233938, + "learning_rate": 1.6753810140289608e-05, + "loss": 0.3229079246520996, + "step": 1219 + }, + { + "epoch": 0.6029902384776967, + "grad_norm": 1.060220470914707, + "learning_rate": 1.6747782328440044e-05, + "loss": 0.3366449773311615, + "step": 1220 + }, + { + "epoch": 0.6034844927715309, + "grad_norm": 1.2656940979343048, + "learning_rate": 1.674175001186741e-05, + "loss": 0.4027010500431061, + "step": 1221 + }, + { + "epoch": 0.6039787470653651, + "grad_norm": 1.039989374871811, + "learning_rate": 1.6735713194598798e-05, + "loss": 0.31566083431243896, + "step": 1222 + }, + { + "epoch": 0.6044730013591993, + "grad_norm": 1.1667815915058346, + "learning_rate": 1.67296718806643e-05, + "loss": 0.3361780047416687, + "step": 1223 + }, + { + "epoch": 0.6049672556530334, + "grad_norm": 1.0628494144880791, + "learning_rate": 1.6723626074097007e-05, + "loss": 0.3197939693927765, + "step": 1224 + }, + { + "epoch": 0.6054615099468676, + "grad_norm": 1.078571350485402, + "learning_rate": 1.671757577893302e-05, + "loss": 0.32977360486984253, + "step": 1225 + }, + { + "epoch": 0.6059557642407019, + "grad_norm": 1.1192119082687915, + "learning_rate": 1.671152099921142e-05, + "loss": 0.3434401750564575, + "step": 1226 + }, + { + "epoch": 0.6064500185345361, + "grad_norm": 1.0664877094913836, + "learning_rate": 1.67054617389743e-05, + "loss": 0.33856305480003357, + "step": 1227 + }, + { + "epoch": 0.6069442728283702, + "grad_norm": 1.147959053573069, + "learning_rate": 1.669939800226673e-05, + "loss": 0.31594911217689514, + "step": 1228 + }, + { + "epoch": 0.6074385271222044, + "grad_norm": 1.105417739927691, + "learning_rate": 1.669332979313678e-05, + "loss": 0.32347679138183594, + "step": 1229 + }, + { + "epoch": 0.6079327814160386, + "grad_norm": 1.1057400329817928, + "learning_rate": 1.6687257115635492e-05, + "loss": 0.32733607292175293, + "step": 1230 + }, + { + "epoch": 0.6084270357098728, + "grad_norm": 0.9869005136013326, + "learning_rate": 1.6681179973816908e-05, + "loss": 0.306827187538147, + "step": 1231 + }, + { + "epoch": 0.6089212900037069, + "grad_norm": 1.068802395839477, + "learning_rate": 1.667509837173803e-05, + "loss": 0.3515884280204773, + "step": 1232 + }, + { + "epoch": 0.6094155442975411, + "grad_norm": 1.0062662165973097, + "learning_rate": 1.6669012313458862e-05, + "loss": 0.28699082136154175, + "step": 1233 + }, + { + "epoch": 0.6099097985913753, + "grad_norm": 1.0697164166178312, + "learning_rate": 1.6662921803042356e-05, + "loss": 0.30737537145614624, + "step": 1234 + }, + { + "epoch": 0.6104040528852095, + "grad_norm": 1.0782793991023802, + "learning_rate": 1.665682684455446e-05, + "loss": 0.3193345069885254, + "step": 1235 + }, + { + "epoch": 0.6108983071790436, + "grad_norm": 1.1629258901733988, + "learning_rate": 1.6650727442064073e-05, + "loss": 0.3326336741447449, + "step": 1236 + }, + { + "epoch": 0.6113925614728778, + "grad_norm": 1.0950813589125916, + "learning_rate": 1.6644623599643076e-05, + "loss": 0.2967267632484436, + "step": 1237 + }, + { + "epoch": 0.611886815766712, + "grad_norm": 1.104366364956542, + "learning_rate": 1.66385153213663e-05, + "loss": 0.3163914084434509, + "step": 1238 + }, + { + "epoch": 0.6123810700605461, + "grad_norm": 1.1913476484695409, + "learning_rate": 1.663240261131155e-05, + "loss": 0.40281808376312256, + "step": 1239 + }, + { + "epoch": 0.6128753243543803, + "grad_norm": 1.1744917859448287, + "learning_rate": 1.6626285473559586e-05, + "loss": 0.33946287631988525, + "step": 1240 + }, + { + "epoch": 0.6133695786482145, + "grad_norm": 1.121011060895708, + "learning_rate": 1.6620163912194114e-05, + "loss": 0.3750913143157959, + "step": 1241 + }, + { + "epoch": 0.6138638329420487, + "grad_norm": 1.1601773319994575, + "learning_rate": 1.6614037931301804e-05, + "loss": 0.32449400424957275, + "step": 1242 + }, + { + "epoch": 0.6143580872358828, + "grad_norm": 1.146035054497973, + "learning_rate": 1.6607907534972277e-05, + "loss": 0.3484799861907959, + "step": 1243 + }, + { + "epoch": 0.614852341529717, + "grad_norm": 1.0478699674323781, + "learning_rate": 1.6601772727298095e-05, + "loss": 0.2991127669811249, + "step": 1244 + }, + { + "epoch": 0.6153465958235512, + "grad_norm": 1.0941316253076903, + "learning_rate": 1.6595633512374768e-05, + "loss": 0.339094340801239, + "step": 1245 + }, + { + "epoch": 0.6158408501173854, + "grad_norm": 1.0756027047064132, + "learning_rate": 1.6589489894300744e-05, + "loss": 0.3147842288017273, + "step": 1246 + }, + { + "epoch": 0.6163351044112195, + "grad_norm": 1.0944450465347566, + "learning_rate": 1.6583341877177427e-05, + "loss": 0.3036183714866638, + "step": 1247 + }, + { + "epoch": 0.6168293587050537, + "grad_norm": 1.0983853525092009, + "learning_rate": 1.657718946510913e-05, + "loss": 0.32657095789909363, + "step": 1248 + }, + { + "epoch": 0.6173236129988879, + "grad_norm": 1.0660730573251251, + "learning_rate": 1.6571032662203126e-05, + "loss": 0.3104664385318756, + "step": 1249 + }, + { + "epoch": 0.6178178672927221, + "grad_norm": 1.0675015064613533, + "learning_rate": 1.6564871472569604e-05, + "loss": 0.30392807722091675, + "step": 1250 + }, + { + "epoch": 0.6183121215865562, + "grad_norm": 1.080894190005694, + "learning_rate": 1.655870590032169e-05, + "loss": 0.3087356388568878, + "step": 1251 + }, + { + "epoch": 0.6188063758803904, + "grad_norm": 1.0633256442775108, + "learning_rate": 1.6552535949575427e-05, + "loss": 0.3220480978488922, + "step": 1252 + }, + { + "epoch": 0.6193006301742247, + "grad_norm": 1.0867949301055795, + "learning_rate": 1.654636162444979e-05, + "loss": 0.33925485610961914, + "step": 1253 + }, + { + "epoch": 0.6197948844680589, + "grad_norm": 1.0651223448844926, + "learning_rate": 1.6540182929066667e-05, + "loss": 0.3704617917537689, + "step": 1254 + }, + { + "epoch": 0.620289138761893, + "grad_norm": 1.1158405395395257, + "learning_rate": 1.653399986755087e-05, + "loss": 0.33745670318603516, + "step": 1255 + }, + { + "epoch": 0.6207833930557272, + "grad_norm": 1.1397943957058634, + "learning_rate": 1.6527812444030118e-05, + "loss": 0.31651467084884644, + "step": 1256 + }, + { + "epoch": 0.6212776473495614, + "grad_norm": 1.141112365152985, + "learning_rate": 1.6521620662635053e-05, + "loss": 0.360455185174942, + "step": 1257 + }, + { + "epoch": 0.6217719016433956, + "grad_norm": 1.0000307812773819, + "learning_rate": 1.6515424527499214e-05, + "loss": 0.32819390296936035, + "step": 1258 + }, + { + "epoch": 0.6222661559372297, + "grad_norm": 1.229539015248975, + "learning_rate": 1.6509224042759053e-05, + "loss": 0.38759690523147583, + "step": 1259 + }, + { + "epoch": 0.6227604102310639, + "grad_norm": 1.127403937815861, + "learning_rate": 1.6503019212553932e-05, + "loss": 0.34250545501708984, + "step": 1260 + }, + { + "epoch": 0.6232546645248981, + "grad_norm": 1.0060644367410545, + "learning_rate": 1.6496810041026097e-05, + "loss": 0.3120163679122925, + "step": 1261 + }, + { + "epoch": 0.6237489188187323, + "grad_norm": 1.1050188267024101, + "learning_rate": 1.649059653232071e-05, + "loss": 0.35985836386680603, + "step": 1262 + }, + { + "epoch": 0.6242431731125664, + "grad_norm": 1.0877426950647728, + "learning_rate": 1.648437869058581e-05, + "loss": 0.3551288843154907, + "step": 1263 + }, + { + "epoch": 0.6247374274064006, + "grad_norm": 1.095568415742879, + "learning_rate": 1.6478156519972354e-05, + "loss": 0.33047816157341003, + "step": 1264 + }, + { + "epoch": 0.6252316817002348, + "grad_norm": 1.0643242802432207, + "learning_rate": 1.6471930024634164e-05, + "loss": 0.32909417152404785, + "step": 1265 + }, + { + "epoch": 0.6257259359940689, + "grad_norm": 1.07195158812182, + "learning_rate": 1.6465699208727964e-05, + "loss": 0.3726924657821655, + "step": 1266 + }, + { + "epoch": 0.6262201902879031, + "grad_norm": 1.1316893144153, + "learning_rate": 1.6459464076413355e-05, + "loss": 0.3569204807281494, + "step": 1267 + }, + { + "epoch": 0.6267144445817373, + "grad_norm": 1.0125649890138406, + "learning_rate": 1.6453224631852825e-05, + "loss": 0.33798107504844666, + "step": 1268 + }, + { + "epoch": 0.6272086988755715, + "grad_norm": 1.1537944647220344, + "learning_rate": 1.644698087921173e-05, + "loss": 0.32891637086868286, + "step": 1269 + }, + { + "epoch": 0.6277029531694056, + "grad_norm": 1.1246833616649612, + "learning_rate": 1.644073282265832e-05, + "loss": 0.31512969732284546, + "step": 1270 + }, + { + "epoch": 0.6281972074632398, + "grad_norm": 1.1199823464164773, + "learning_rate": 1.643448046636371e-05, + "loss": 0.350041925907135, + "step": 1271 + }, + { + "epoch": 0.628691461757074, + "grad_norm": 1.0925989435954497, + "learning_rate": 1.642822381450187e-05, + "loss": 0.3248854875564575, + "step": 1272 + }, + { + "epoch": 0.6291857160509082, + "grad_norm": 1.0344569444697491, + "learning_rate": 1.6421962871249662e-05, + "loss": 0.3031661808490753, + "step": 1273 + }, + { + "epoch": 0.6296799703447423, + "grad_norm": 1.0843035546126185, + "learning_rate": 1.6415697640786802e-05, + "loss": 0.2903754711151123, + "step": 1274 + }, + { + "epoch": 0.6301742246385765, + "grad_norm": 1.0122518499053432, + "learning_rate": 1.6409428127295864e-05, + "loss": 0.300454318523407, + "step": 1275 + }, + { + "epoch": 0.6306684789324107, + "grad_norm": 1.0842968830814483, + "learning_rate": 1.6403154334962286e-05, + "loss": 0.3430244028568268, + "step": 1276 + }, + { + "epoch": 0.6311627332262449, + "grad_norm": 1.1383634793407482, + "learning_rate": 1.6396876267974367e-05, + "loss": 0.3728436827659607, + "step": 1277 + }, + { + "epoch": 0.631656987520079, + "grad_norm": 1.103371729978927, + "learning_rate": 1.639059393052325e-05, + "loss": 0.3021183907985687, + "step": 1278 + }, + { + "epoch": 0.6321512418139132, + "grad_norm": 1.0649900935701406, + "learning_rate": 1.6384307326802934e-05, + "loss": 0.3313615918159485, + "step": 1279 + }, + { + "epoch": 0.6326454961077475, + "grad_norm": 1.0519110395000262, + "learning_rate": 1.637801646101027e-05, + "loss": 0.32833239436149597, + "step": 1280 + }, + { + "epoch": 0.6331397504015817, + "grad_norm": 1.1672616485147485, + "learning_rate": 1.6371721337344947e-05, + "loss": 0.3575769066810608, + "step": 1281 + }, + { + "epoch": 0.6336340046954158, + "grad_norm": 1.044512245658177, + "learning_rate": 1.6365421960009502e-05, + "loss": 0.33323729038238525, + "step": 1282 + }, + { + "epoch": 0.63412825898925, + "grad_norm": 1.150185694461945, + "learning_rate": 1.6359118333209307e-05, + "loss": 0.3522900938987732, + "step": 1283 + }, + { + "epoch": 0.6346225132830842, + "grad_norm": 1.2143932108960407, + "learning_rate": 1.635281046115257e-05, + "loss": 0.3350796699523926, + "step": 1284 + }, + { + "epoch": 0.6351167675769184, + "grad_norm": 1.2071815938700088, + "learning_rate": 1.6346498348050342e-05, + "loss": 0.350632905960083, + "step": 1285 + }, + { + "epoch": 0.6356110218707525, + "grad_norm": 1.0108749382306044, + "learning_rate": 1.6340181998116494e-05, + "loss": 0.2961253523826599, + "step": 1286 + }, + { + "epoch": 0.6361052761645867, + "grad_norm": 1.3686468141070485, + "learning_rate": 1.6333861415567736e-05, + "loss": 0.35736170411109924, + "step": 1287 + }, + { + "epoch": 0.6365995304584209, + "grad_norm": 1.1749750672779442, + "learning_rate": 1.63275366046236e-05, + "loss": 0.35654571652412415, + "step": 1288 + }, + { + "epoch": 0.6370937847522551, + "grad_norm": 1.0658003578898634, + "learning_rate": 1.6321207569506435e-05, + "loss": 0.30518224835395813, + "step": 1289 + }, + { + "epoch": 0.6375880390460892, + "grad_norm": 1.1007851387105425, + "learning_rate": 1.6314874314441413e-05, + "loss": 0.35099470615386963, + "step": 1290 + }, + { + "epoch": 0.6380822933399234, + "grad_norm": 1.0971286067217327, + "learning_rate": 1.6308536843656528e-05, + "loss": 0.3577536344528198, + "step": 1291 + }, + { + "epoch": 0.6385765476337576, + "grad_norm": 1.0395121014513669, + "learning_rate": 1.6302195161382586e-05, + "loss": 0.3141167163848877, + "step": 1292 + }, + { + "epoch": 0.6390708019275917, + "grad_norm": 0.981608659730199, + "learning_rate": 1.62958492718532e-05, + "loss": 0.2920055389404297, + "step": 1293 + }, + { + "epoch": 0.6395650562214259, + "grad_norm": 1.0875768517352407, + "learning_rate": 1.6289499179304797e-05, + "loss": 0.32826486229896545, + "step": 1294 + }, + { + "epoch": 0.6400593105152601, + "grad_norm": 1.0051851075633542, + "learning_rate": 1.628314488797661e-05, + "loss": 0.3080480992794037, + "step": 1295 + }, + { + "epoch": 0.6405535648090943, + "grad_norm": 1.006537470660458, + "learning_rate": 1.627678640211067e-05, + "loss": 0.304529070854187, + "step": 1296 + }, + { + "epoch": 0.6410478191029284, + "grad_norm": 1.1108978139615113, + "learning_rate": 1.627042372595181e-05, + "loss": 0.34653496742248535, + "step": 1297 + }, + { + "epoch": 0.6415420733967626, + "grad_norm": 0.9745027779333038, + "learning_rate": 1.6264056863747667e-05, + "loss": 0.2938673496246338, + "step": 1298 + }, + { + "epoch": 0.6420363276905968, + "grad_norm": 1.1585281714148792, + "learning_rate": 1.625768581974866e-05, + "loss": 0.32350343465805054, + "step": 1299 + }, + { + "epoch": 0.642530581984431, + "grad_norm": 1.0756982630474194, + "learning_rate": 1.6251310598208015e-05, + "loss": 0.3175384998321533, + "step": 1300 + }, + { + "epoch": 0.6430248362782651, + "grad_norm": 1.1335110071944674, + "learning_rate": 1.6244931203381734e-05, + "loss": 0.32667648792266846, + "step": 1301 + }, + { + "epoch": 0.6435190905720993, + "grad_norm": 0.9986052180267636, + "learning_rate": 1.623854763952861e-05, + "loss": 0.30110976099967957, + "step": 1302 + }, + { + "epoch": 0.6440133448659335, + "grad_norm": 1.2219754266907614, + "learning_rate": 1.6232159910910224e-05, + "loss": 0.3508617579936981, + "step": 1303 + }, + { + "epoch": 0.6445075991597677, + "grad_norm": 1.1027211796126624, + "learning_rate": 1.622576802179092e-05, + "loss": 0.34416183829307556, + "step": 1304 + }, + { + "epoch": 0.6450018534536018, + "grad_norm": 1.1267200023483468, + "learning_rate": 1.6219371976437847e-05, + "loss": 0.3509306311607361, + "step": 1305 + }, + { + "epoch": 0.645496107747436, + "grad_norm": 1.1746524244290708, + "learning_rate": 1.6212971779120904e-05, + "loss": 0.36186683177948, + "step": 1306 + }, + { + "epoch": 0.6459903620412702, + "grad_norm": 1.128374133277422, + "learning_rate": 1.6206567434112776e-05, + "loss": 0.3123924732208252, + "step": 1307 + }, + { + "epoch": 0.6464846163351045, + "grad_norm": 1.2141772034453755, + "learning_rate": 1.6200158945688907e-05, + "loss": 0.3691411018371582, + "step": 1308 + }, + { + "epoch": 0.6469788706289386, + "grad_norm": 1.1011618758034853, + "learning_rate": 1.6193746318127516e-05, + "loss": 0.3136986792087555, + "step": 1309 + }, + { + "epoch": 0.6474731249227728, + "grad_norm": 1.0883839992045683, + "learning_rate": 1.6187329555709585e-05, + "loss": 0.30374211072921753, + "step": 1310 + }, + { + "epoch": 0.647967379216607, + "grad_norm": 1.207837369942263, + "learning_rate": 1.618090866271884e-05, + "loss": 0.3633323907852173, + "step": 1311 + }, + { + "epoch": 0.6484616335104412, + "grad_norm": 1.056749654034174, + "learning_rate": 1.6174483643441795e-05, + "loss": 0.31395208835601807, + "step": 1312 + }, + { + "epoch": 0.6489558878042753, + "grad_norm": 1.0312943002596973, + "learning_rate": 1.6168054502167687e-05, + "loss": 0.29258471727371216, + "step": 1313 + }, + { + "epoch": 0.6494501420981095, + "grad_norm": 1.052844702612926, + "learning_rate": 1.6161621243188528e-05, + "loss": 0.3086007833480835, + "step": 1314 + }, + { + "epoch": 0.6499443963919437, + "grad_norm": 1.1099907156572013, + "learning_rate": 1.6155183870799063e-05, + "loss": 0.3604614734649658, + "step": 1315 + }, + { + "epoch": 0.6504386506857779, + "grad_norm": 1.230657559418624, + "learning_rate": 1.614874238929679e-05, + "loss": 0.3784678876399994, + "step": 1316 + }, + { + "epoch": 0.650932904979612, + "grad_norm": 0.9692609071600233, + "learning_rate": 1.6142296802981957e-05, + "loss": 0.29009610414505005, + "step": 1317 + }, + { + "epoch": 0.6514271592734462, + "grad_norm": 1.1385261282180998, + "learning_rate": 1.6135847116157542e-05, + "loss": 0.3667104244232178, + "step": 1318 + }, + { + "epoch": 0.6519214135672804, + "grad_norm": 1.0454111919656257, + "learning_rate": 1.6129393333129262e-05, + "loss": 0.3100985884666443, + "step": 1319 + }, + { + "epoch": 0.6524156678611145, + "grad_norm": 1.0967001531345488, + "learning_rate": 1.612293545820557e-05, + "loss": 0.34128522872924805, + "step": 1320 + }, + { + "epoch": 0.6529099221549487, + "grad_norm": 1.016572733864691, + "learning_rate": 1.611647349569765e-05, + "loss": 0.3017216920852661, + "step": 1321 + }, + { + "epoch": 0.6534041764487829, + "grad_norm": 1.0979244854260226, + "learning_rate": 1.611000744991942e-05, + "loss": 0.35060590505599976, + "step": 1322 + }, + { + "epoch": 0.6538984307426171, + "grad_norm": 1.180855026456707, + "learning_rate": 1.610353732518752e-05, + "loss": 0.3766549825668335, + "step": 1323 + }, + { + "epoch": 0.6543926850364512, + "grad_norm": 0.9954937284294141, + "learning_rate": 1.609706312582131e-05, + "loss": 0.2970678210258484, + "step": 1324 + }, + { + "epoch": 0.6548869393302854, + "grad_norm": 1.2407304893003468, + "learning_rate": 1.609058485614287e-05, + "loss": 0.3345789909362793, + "step": 1325 + }, + { + "epoch": 0.6553811936241196, + "grad_norm": 1.159801774337048, + "learning_rate": 1.608410252047701e-05, + "loss": 0.34838157892227173, + "step": 1326 + }, + { + "epoch": 0.6558754479179538, + "grad_norm": 1.052743453114199, + "learning_rate": 1.6077616123151232e-05, + "loss": 0.27454087138175964, + "step": 1327 + }, + { + "epoch": 0.6563697022117879, + "grad_norm": 1.1304513457691607, + "learning_rate": 1.607112566849577e-05, + "loss": 0.3372647762298584, + "step": 1328 + }, + { + "epoch": 0.6568639565056221, + "grad_norm": 1.1678098502989476, + "learning_rate": 1.606463116084356e-05, + "loss": 0.34433993697166443, + "step": 1329 + }, + { + "epoch": 0.6573582107994563, + "grad_norm": 1.0760327464429003, + "learning_rate": 1.6058132604530242e-05, + "loss": 0.3267759382724762, + "step": 1330 + }, + { + "epoch": 0.6578524650932905, + "grad_norm": 1.044029067228307, + "learning_rate": 1.6051630003894155e-05, + "loss": 0.3022347390651703, + "step": 1331 + }, + { + "epoch": 0.6583467193871246, + "grad_norm": 1.0701124312590375, + "learning_rate": 1.604512336327634e-05, + "loss": 0.32478266954421997, + "step": 1332 + }, + { + "epoch": 0.6588409736809588, + "grad_norm": 1.1194211733981758, + "learning_rate": 1.6038612687020548e-05, + "loss": 0.32039204239845276, + "step": 1333 + }, + { + "epoch": 0.659335227974793, + "grad_norm": 1.189072572166891, + "learning_rate": 1.6032097979473203e-05, + "loss": 0.3376410901546478, + "step": 1334 + }, + { + "epoch": 0.6598294822686273, + "grad_norm": 1.0209465387535948, + "learning_rate": 1.6025579244983443e-05, + "loss": 0.28432029485702515, + "step": 1335 + }, + { + "epoch": 0.6603237365624613, + "grad_norm": 1.1101085579973957, + "learning_rate": 1.6019056487903067e-05, + "loss": 0.3349001109600067, + "step": 1336 + }, + { + "epoch": 0.6608179908562956, + "grad_norm": 1.016991018325495, + "learning_rate": 1.601252971258658e-05, + "loss": 0.27995598316192627, + "step": 1337 + }, + { + "epoch": 0.6613122451501298, + "grad_norm": 1.0652875110729838, + "learning_rate": 1.6005998923391172e-05, + "loss": 0.28326892852783203, + "step": 1338 + }, + { + "epoch": 0.661806499443964, + "grad_norm": 1.1089400050162956, + "learning_rate": 1.5999464124676697e-05, + "loss": 0.3139200806617737, + "step": 1339 + }, + { + "epoch": 0.6623007537377981, + "grad_norm": 1.0857703956199403, + "learning_rate": 1.5992925320805688e-05, + "loss": 0.32395505905151367, + "step": 1340 + }, + { + "epoch": 0.6627950080316323, + "grad_norm": 1.187400707476865, + "learning_rate": 1.598638251614337e-05, + "loss": 0.35880255699157715, + "step": 1341 + }, + { + "epoch": 0.6632892623254665, + "grad_norm": 1.1264632686384342, + "learning_rate": 1.5979835715057616e-05, + "loss": 0.3696775436401367, + "step": 1342 + }, + { + "epoch": 0.6637835166193007, + "grad_norm": 1.2084738763641774, + "learning_rate": 1.597328492191898e-05, + "loss": 0.38413193821907043, + "step": 1343 + }, + { + "epoch": 0.6642777709131348, + "grad_norm": 2.0572947223290017, + "learning_rate": 1.596673014110068e-05, + "loss": 0.3564830720424652, + "step": 1344 + }, + { + "epoch": 0.664772025206969, + "grad_norm": 1.0170026931569898, + "learning_rate": 1.5960171376978587e-05, + "loss": 0.30634552240371704, + "step": 1345 + }, + { + "epoch": 0.6652662795008032, + "grad_norm": 1.0375692111937291, + "learning_rate": 1.595360863393125e-05, + "loss": 0.27113068103790283, + "step": 1346 + }, + { + "epoch": 0.6657605337946373, + "grad_norm": 1.242773829739391, + "learning_rate": 1.594704191633985e-05, + "loss": 0.34015512466430664, + "step": 1347 + }, + { + "epoch": 0.6662547880884715, + "grad_norm": 0.9724222230737607, + "learning_rate": 1.594047122858824e-05, + "loss": 0.2509229779243469, + "step": 1348 + }, + { + "epoch": 0.6667490423823057, + "grad_norm": 1.0705371704599513, + "learning_rate": 1.5933896575062922e-05, + "loss": 0.35122111439704895, + "step": 1349 + }, + { + "epoch": 0.6672432966761399, + "grad_norm": 1.0469402955634624, + "learning_rate": 1.592731796015303e-05, + "loss": 0.3656314015388489, + "step": 1350 + }, + { + "epoch": 0.667737550969974, + "grad_norm": 1.0980190562444532, + "learning_rate": 1.5920735388250363e-05, + "loss": 0.3482551574707031, + "step": 1351 + }, + { + "epoch": 0.6682318052638082, + "grad_norm": 0.9987728958846398, + "learning_rate": 1.5914148863749344e-05, + "loss": 0.2852175831794739, + "step": 1352 + }, + { + "epoch": 0.6687260595576424, + "grad_norm": 1.1231968462948256, + "learning_rate": 1.590755839104705e-05, + "loss": 0.3435940742492676, + "step": 1353 + }, + { + "epoch": 0.6692203138514766, + "grad_norm": 1.2334019463480403, + "learning_rate": 1.590096397454318e-05, + "loss": 0.34816527366638184, + "step": 1354 + }, + { + "epoch": 0.6697145681453107, + "grad_norm": 1.4472355399081582, + "learning_rate": 1.5894365618640077e-05, + "loss": 0.3283170461654663, + "step": 1355 + }, + { + "epoch": 0.6702088224391449, + "grad_norm": 1.1520168978191874, + "learning_rate": 1.588776332774271e-05, + "loss": 0.335905522108078, + "step": 1356 + }, + { + "epoch": 0.6707030767329791, + "grad_norm": 1.1244736910598108, + "learning_rate": 1.5881157106258666e-05, + "loss": 0.3055316209793091, + "step": 1357 + }, + { + "epoch": 0.6711973310268133, + "grad_norm": 1.050666765324263, + "learning_rate": 1.5874546958598172e-05, + "loss": 0.2873142659664154, + "step": 1358 + }, + { + "epoch": 0.6716915853206474, + "grad_norm": 1.0218331884680711, + "learning_rate": 1.586793288917406e-05, + "loss": 0.29659712314605713, + "step": 1359 + }, + { + "epoch": 0.6721858396144816, + "grad_norm": 1.0827802259474617, + "learning_rate": 1.5861314902401802e-05, + "loss": 0.33081990480422974, + "step": 1360 + }, + { + "epoch": 0.6726800939083158, + "grad_norm": 1.2140107638410536, + "learning_rate": 1.5854693002699457e-05, + "loss": 0.3559015691280365, + "step": 1361 + }, + { + "epoch": 0.67317434820215, + "grad_norm": 1.1424828520826207, + "learning_rate": 1.584806719448772e-05, + "loss": 0.3353438973426819, + "step": 1362 + }, + { + "epoch": 0.6736686024959841, + "grad_norm": 1.0533009951881467, + "learning_rate": 1.5841437482189882e-05, + "loss": 0.3320685923099518, + "step": 1363 + }, + { + "epoch": 0.6741628567898184, + "grad_norm": 1.0600254033440624, + "learning_rate": 1.5834803870231846e-05, + "loss": 0.3070179224014282, + "step": 1364 + }, + { + "epoch": 0.6746571110836526, + "grad_norm": 1.0452219544938475, + "learning_rate": 1.5828166363042115e-05, + "loss": 0.28779780864715576, + "step": 1365 + }, + { + "epoch": 0.6751513653774868, + "grad_norm": 0.9932658974656241, + "learning_rate": 1.5821524965051793e-05, + "loss": 0.2793114185333252, + "step": 1366 + }, + { + "epoch": 0.6756456196713209, + "grad_norm": 1.117744874079583, + "learning_rate": 1.5814879680694585e-05, + "loss": 0.3586357831954956, + "step": 1367 + }, + { + "epoch": 0.6761398739651551, + "grad_norm": 1.122494918770383, + "learning_rate": 1.5808230514406786e-05, + "loss": 0.35258832573890686, + "step": 1368 + }, + { + "epoch": 0.6766341282589893, + "grad_norm": 1.0624893424167818, + "learning_rate": 1.5801577470627286e-05, + "loss": 0.2783607840538025, + "step": 1369 + }, + { + "epoch": 0.6771283825528235, + "grad_norm": 1.217710803865883, + "learning_rate": 1.579492055379756e-05, + "loss": 0.3494858741760254, + "step": 1370 + }, + { + "epoch": 0.6776226368466576, + "grad_norm": 1.1913846811426898, + "learning_rate": 1.578825976836167e-05, + "loss": 0.34512561559677124, + "step": 1371 + }, + { + "epoch": 0.6781168911404918, + "grad_norm": 1.0303182849177774, + "learning_rate": 1.5781595118766265e-05, + "loss": 0.2923341989517212, + "step": 1372 + }, + { + "epoch": 0.678611145434326, + "grad_norm": 1.0423481220482165, + "learning_rate": 1.5774926609460566e-05, + "loss": 0.3078833818435669, + "step": 1373 + }, + { + "epoch": 0.6791053997281601, + "grad_norm": 1.0871141007271816, + "learning_rate": 1.576825424489638e-05, + "loss": 0.3147008419036865, + "step": 1374 + }, + { + "epoch": 0.6795996540219943, + "grad_norm": 1.0340836184197277, + "learning_rate": 1.576157802952807e-05, + "loss": 0.2907789349555969, + "step": 1375 + }, + { + "epoch": 0.6800939083158285, + "grad_norm": 1.1801114991913197, + "learning_rate": 1.57548979678126e-05, + "loss": 0.2941555976867676, + "step": 1376 + }, + { + "epoch": 0.6805881626096627, + "grad_norm": 1.137398706652914, + "learning_rate": 1.5748214064209473e-05, + "loss": 0.3452342748641968, + "step": 1377 + }, + { + "epoch": 0.6810824169034968, + "grad_norm": 0.9870368606552603, + "learning_rate": 1.5741526323180765e-05, + "loss": 0.31481361389160156, + "step": 1378 + }, + { + "epoch": 0.681576671197331, + "grad_norm": 1.1734004344416635, + "learning_rate": 1.573483474919112e-05, + "loss": 0.3403349220752716, + "step": 1379 + }, + { + "epoch": 0.6820709254911652, + "grad_norm": 1.3661262290783491, + "learning_rate": 1.572813934670774e-05, + "loss": 0.3283364176750183, + "step": 1380 + }, + { + "epoch": 0.6825651797849994, + "grad_norm": 1.0790334315781973, + "learning_rate": 1.5721440120200376e-05, + "loss": 0.3294883966445923, + "step": 1381 + }, + { + "epoch": 0.6830594340788335, + "grad_norm": 1.057215667272423, + "learning_rate": 1.5714737074141338e-05, + "loss": 0.3087981343269348, + "step": 1382 + }, + { + "epoch": 0.6835536883726677, + "grad_norm": 0.9953380542206125, + "learning_rate": 1.570803021300548e-05, + "loss": 0.29511693120002747, + "step": 1383 + }, + { + "epoch": 0.6840479426665019, + "grad_norm": 1.1147415286539601, + "learning_rate": 1.570131954127021e-05, + "loss": 0.3620823323726654, + "step": 1384 + }, + { + "epoch": 0.6845421969603361, + "grad_norm": 1.2518358127130127, + "learning_rate": 1.5694605063415477e-05, + "loss": 0.3978300988674164, + "step": 1385 + }, + { + "epoch": 0.6850364512541702, + "grad_norm": 1.2104388988265296, + "learning_rate": 1.5687886783923773e-05, + "loss": 0.35367661714553833, + "step": 1386 + }, + { + "epoch": 0.6855307055480044, + "grad_norm": 1.158470270474232, + "learning_rate": 1.5681164707280117e-05, + "loss": 0.3313448131084442, + "step": 1387 + }, + { + "epoch": 0.6860249598418386, + "grad_norm": 1.1312206183637163, + "learning_rate": 1.5674438837972077e-05, + "loss": 0.34115713834762573, + "step": 1388 + }, + { + "epoch": 0.6865192141356729, + "grad_norm": 1.071906380475402, + "learning_rate": 1.566770918048975e-05, + "loss": 0.311326265335083, + "step": 1389 + }, + { + "epoch": 0.687013468429507, + "grad_norm": 1.0496646406815568, + "learning_rate": 1.5660975739325755e-05, + "loss": 0.32622700929641724, + "step": 1390 + }, + { + "epoch": 0.6875077227233412, + "grad_norm": 1.1530479303397307, + "learning_rate": 1.565423851897524e-05, + "loss": 0.36029747128486633, + "step": 1391 + }, + { + "epoch": 0.6880019770171754, + "grad_norm": 0.9691306195768644, + "learning_rate": 1.5647497523935883e-05, + "loss": 0.2771177291870117, + "step": 1392 + }, + { + "epoch": 0.6884962313110096, + "grad_norm": 1.1450942478438548, + "learning_rate": 1.5640752758707868e-05, + "loss": 0.3474002182483673, + "step": 1393 + }, + { + "epoch": 0.6889904856048437, + "grad_norm": 1.09850595363495, + "learning_rate": 1.563400422779391e-05, + "loss": 0.28006255626678467, + "step": 1394 + }, + { + "epoch": 0.6894847398986779, + "grad_norm": 1.0953635794573913, + "learning_rate": 1.562725193569923e-05, + "loss": 0.32151490449905396, + "step": 1395 + }, + { + "epoch": 0.6899789941925121, + "grad_norm": 1.1995785901348681, + "learning_rate": 1.5620495886931557e-05, + "loss": 0.3081187903881073, + "step": 1396 + }, + { + "epoch": 0.6904732484863463, + "grad_norm": 1.1390576796125735, + "learning_rate": 1.561373608600114e-05, + "loss": 0.3158992826938629, + "step": 1397 + }, + { + "epoch": 0.6909675027801804, + "grad_norm": 1.1783652693752096, + "learning_rate": 1.5606972537420723e-05, + "loss": 0.33790335059165955, + "step": 1398 + }, + { + "epoch": 0.6914617570740146, + "grad_norm": 1.1733705340509706, + "learning_rate": 1.5600205245705553e-05, + "loss": 0.3157292902469635, + "step": 1399 + }, + { + "epoch": 0.6919560113678488, + "grad_norm": 1.1674234642263648, + "learning_rate": 1.559343421537338e-05, + "loss": 0.31090572476387024, + "step": 1400 + }, + { + "epoch": 0.6924502656616829, + "grad_norm": 1.1604041250760992, + "learning_rate": 1.5586659450944443e-05, + "loss": 0.30499958992004395, + "step": 1401 + }, + { + "epoch": 0.6929445199555171, + "grad_norm": 1.0713722972416724, + "learning_rate": 1.5579880956941478e-05, + "loss": 0.3036794662475586, + "step": 1402 + }, + { + "epoch": 0.6934387742493513, + "grad_norm": 1.1543376848490539, + "learning_rate": 1.5573098737889716e-05, + "loss": 0.26514700055122375, + "step": 1403 + }, + { + "epoch": 0.6939330285431855, + "grad_norm": 1.0755683699565965, + "learning_rate": 1.5566312798316867e-05, + "loss": 0.31947457790374756, + "step": 1404 + }, + { + "epoch": 0.6944272828370196, + "grad_norm": 1.1317886658483896, + "learning_rate": 1.5559523142753124e-05, + "loss": 0.29387322068214417, + "step": 1405 + }, + { + "epoch": 0.6949215371308538, + "grad_norm": 1.117372828260635, + "learning_rate": 1.555272977573117e-05, + "loss": 0.33459946513175964, + "step": 1406 + }, + { + "epoch": 0.695415791424688, + "grad_norm": 1.2196871082649428, + "learning_rate": 1.5545932701786154e-05, + "loss": 0.31394320726394653, + "step": 1407 + }, + { + "epoch": 0.6959100457185222, + "grad_norm": 1.0669033993360486, + "learning_rate": 1.5539131925455713e-05, + "loss": 0.2891885042190552, + "step": 1408 + }, + { + "epoch": 0.6964043000123563, + "grad_norm": 1.2475463319045528, + "learning_rate": 1.5532327451279938e-05, + "loss": 0.33686599135398865, + "step": 1409 + }, + { + "epoch": 0.6968985543061905, + "grad_norm": 1.0648029492831064, + "learning_rate": 1.5525519283801405e-05, + "loss": 0.31463146209716797, + "step": 1410 + }, + { + "epoch": 0.6973928086000247, + "grad_norm": 1.226099759538899, + "learning_rate": 1.5518707427565146e-05, + "loss": 0.3598940372467041, + "step": 1411 + }, + { + "epoch": 0.6978870628938589, + "grad_norm": 1.149083094787804, + "learning_rate": 1.5511891887118665e-05, + "loss": 0.32980066537857056, + "step": 1412 + }, + { + "epoch": 0.698381317187693, + "grad_norm": 1.1872142618250514, + "learning_rate": 1.5505072667011915e-05, + "loss": 0.3264961242675781, + "step": 1413 + }, + { + "epoch": 0.6988755714815272, + "grad_norm": 1.0604770012284015, + "learning_rate": 1.549824977179731e-05, + "loss": 0.3355519771575928, + "step": 1414 + }, + { + "epoch": 0.6993698257753614, + "grad_norm": 1.0119765938601295, + "learning_rate": 1.5491423206029717e-05, + "loss": 0.27073174715042114, + "step": 1415 + }, + { + "epoch": 0.6998640800691956, + "grad_norm": 1.1356545279602395, + "learning_rate": 1.5484592974266456e-05, + "loss": 0.32638323307037354, + "step": 1416 + }, + { + "epoch": 0.7003583343630297, + "grad_norm": 1.192307972564017, + "learning_rate": 1.5477759081067288e-05, + "loss": 0.38844019174575806, + "step": 1417 + }, + { + "epoch": 0.700852588656864, + "grad_norm": 1.1060104448967631, + "learning_rate": 1.5470921530994426e-05, + "loss": 0.3386498689651489, + "step": 1418 + }, + { + "epoch": 0.7013468429506982, + "grad_norm": 1.113333245203903, + "learning_rate": 1.5464080328612522e-05, + "loss": 0.3304392993450165, + "step": 1419 + }, + { + "epoch": 0.7018410972445324, + "grad_norm": 1.1024158772042199, + "learning_rate": 1.545723547848866e-05, + "loss": 0.314837247133255, + "step": 1420 + }, + { + "epoch": 0.7023353515383665, + "grad_norm": 0.9888192419219921, + "learning_rate": 1.5450386985192368e-05, + "loss": 0.30135127902030945, + "step": 1421 + }, + { + "epoch": 0.7028296058322007, + "grad_norm": 1.0640354824874358, + "learning_rate": 1.5443534853295602e-05, + "loss": 0.29176798462867737, + "step": 1422 + }, + { + "epoch": 0.7033238601260349, + "grad_norm": 1.3021824252266967, + "learning_rate": 1.5436679087372746e-05, + "loss": 0.36438125371932983, + "step": 1423 + }, + { + "epoch": 0.703818114419869, + "grad_norm": 1.1147780995478658, + "learning_rate": 1.542981969200061e-05, + "loss": 0.37140434980392456, + "step": 1424 + }, + { + "epoch": 0.7043123687137032, + "grad_norm": 1.3176538326023695, + "learning_rate": 1.542295667175843e-05, + "loss": 0.36072903871536255, + "step": 1425 + }, + { + "epoch": 0.7048066230075374, + "grad_norm": 1.1262882885574772, + "learning_rate": 1.5416090031227868e-05, + "loss": 0.3266327977180481, + "step": 1426 + }, + { + "epoch": 0.7053008773013716, + "grad_norm": 1.0179565917308762, + "learning_rate": 1.5409219774992978e-05, + "loss": 0.3081423342227936, + "step": 1427 + }, + { + "epoch": 0.7057951315952057, + "grad_norm": 1.3034313694807904, + "learning_rate": 1.5402345907640262e-05, + "loss": 0.3571197986602783, + "step": 1428 + }, + { + "epoch": 0.7062893858890399, + "grad_norm": 1.1385888315844002, + "learning_rate": 1.5395468433758604e-05, + "loss": 0.32380104064941406, + "step": 1429 + }, + { + "epoch": 0.7067836401828741, + "grad_norm": 1.0129718670355197, + "learning_rate": 1.5388587357939313e-05, + "loss": 0.33777546882629395, + "step": 1430 + }, + { + "epoch": 0.7072778944767083, + "grad_norm": 1.0997780610685683, + "learning_rate": 1.5381702684776093e-05, + "loss": 0.31793370842933655, + "step": 1431 + }, + { + "epoch": 0.7077721487705424, + "grad_norm": 1.065324744616134, + "learning_rate": 1.537481441886506e-05, + "loss": 0.3282355070114136, + "step": 1432 + }, + { + "epoch": 0.7082664030643766, + "grad_norm": 1.1740655706878367, + "learning_rate": 1.5367922564804716e-05, + "loss": 0.3523057699203491, + "step": 1433 + }, + { + "epoch": 0.7087606573582108, + "grad_norm": 1.1790295388685894, + "learning_rate": 1.5361027127195964e-05, + "loss": 0.36351460218429565, + "step": 1434 + }, + { + "epoch": 0.709254911652045, + "grad_norm": 2.2339320260763373, + "learning_rate": 1.5354128110642102e-05, + "loss": 0.2936401963233948, + "step": 1435 + }, + { + "epoch": 0.7097491659458791, + "grad_norm": 1.1080576186798932, + "learning_rate": 1.5347225519748818e-05, + "loss": 0.3178175091743469, + "step": 1436 + }, + { + "epoch": 0.7102434202397133, + "grad_norm": 1.1375761171495609, + "learning_rate": 1.5340319359124177e-05, + "loss": 0.3098832666873932, + "step": 1437 + }, + { + "epoch": 0.7107376745335475, + "grad_norm": 0.951807024133746, + "learning_rate": 1.5333409633378633e-05, + "loss": 0.2644941806793213, + "step": 1438 + }, + { + "epoch": 0.7112319288273817, + "grad_norm": 1.1193499530101132, + "learning_rate": 1.5326496347125027e-05, + "loss": 0.3046286702156067, + "step": 1439 + }, + { + "epoch": 0.7117261831212158, + "grad_norm": 1.1009971048909013, + "learning_rate": 1.5319579504978567e-05, + "loss": 0.33757925033569336, + "step": 1440 + }, + { + "epoch": 0.71222043741505, + "grad_norm": 1.1415644120008137, + "learning_rate": 1.5312659111556832e-05, + "loss": 0.3470202684402466, + "step": 1441 + }, + { + "epoch": 0.7127146917088842, + "grad_norm": 1.0829483976260892, + "learning_rate": 1.5305735171479785e-05, + "loss": 0.3310868740081787, + "step": 1442 + }, + { + "epoch": 0.7132089460027184, + "grad_norm": 1.2738694792524405, + "learning_rate": 1.529880768936975e-05, + "loss": 0.31649407744407654, + "step": 1443 + }, + { + "epoch": 0.7137032002965525, + "grad_norm": 1.0510301649062292, + "learning_rate": 1.5291876669851408e-05, + "loss": 0.2986135184764862, + "step": 1444 + }, + { + "epoch": 0.7141974545903867, + "grad_norm": 1.1622525691797543, + "learning_rate": 1.5284942117551817e-05, + "loss": 0.3033408224582672, + "step": 1445 + }, + { + "epoch": 0.714691708884221, + "grad_norm": 1.1648719329133883, + "learning_rate": 1.5278004037100378e-05, + "loss": 0.34231680631637573, + "step": 1446 + }, + { + "epoch": 0.7151859631780552, + "grad_norm": 1.1347301204641653, + "learning_rate": 1.5271062433128857e-05, + "loss": 0.3273579478263855, + "step": 1447 + }, + { + "epoch": 0.7156802174718893, + "grad_norm": 1.2307292916383785, + "learning_rate": 1.5264117310271372e-05, + "loss": 0.344064861536026, + "step": 1448 + }, + { + "epoch": 0.7161744717657235, + "grad_norm": 1.0685505855741966, + "learning_rate": 1.5257168673164384e-05, + "loss": 0.3131038546562195, + "step": 1449 + }, + { + "epoch": 0.7166687260595577, + "grad_norm": 1.1403948273488542, + "learning_rate": 1.5250216526446708e-05, + "loss": 0.32794755697250366, + "step": 1450 + }, + { + "epoch": 0.7171629803533918, + "grad_norm": 1.2597097116316462, + "learning_rate": 1.5243260874759494e-05, + "loss": 0.3633842468261719, + "step": 1451 + }, + { + "epoch": 0.717657234647226, + "grad_norm": 0.943013995379639, + "learning_rate": 1.5236301722746235e-05, + "loss": 0.24650251865386963, + "step": 1452 + }, + { + "epoch": 0.7181514889410602, + "grad_norm": 1.1777840335640666, + "learning_rate": 1.5229339075052769e-05, + "loss": 0.34167230129241943, + "step": 1453 + }, + { + "epoch": 0.7186457432348944, + "grad_norm": 1.0945051908887762, + "learning_rate": 1.522237293632725e-05, + "loss": 0.29454126954078674, + "step": 1454 + }, + { + "epoch": 0.7191399975287285, + "grad_norm": 1.1517995676673816, + "learning_rate": 1.5215403311220178e-05, + "loss": 0.3709314465522766, + "step": 1455 + }, + { + "epoch": 0.7196342518225627, + "grad_norm": 1.1421076533752808, + "learning_rate": 1.5208430204384377e-05, + "loss": 0.3543916642665863, + "step": 1456 + }, + { + "epoch": 0.7201285061163969, + "grad_norm": 1.1924648010793302, + "learning_rate": 1.5201453620474986e-05, + "loss": 0.33827707171440125, + "step": 1457 + }, + { + "epoch": 0.7206227604102311, + "grad_norm": 1.1616070041381745, + "learning_rate": 1.5194473564149484e-05, + "loss": 0.31289514899253845, + "step": 1458 + }, + { + "epoch": 0.7211170147040652, + "grad_norm": 1.1655875507968474, + "learning_rate": 1.5187490040067646e-05, + "loss": 0.3345657289028168, + "step": 1459 + }, + { + "epoch": 0.7216112689978994, + "grad_norm": 1.091971369166992, + "learning_rate": 1.5180503052891578e-05, + "loss": 0.3322404623031616, + "step": 1460 + }, + { + "epoch": 0.7221055232917336, + "grad_norm": 1.0009476128919939, + "learning_rate": 1.5173512607285692e-05, + "loss": 0.31120461225509644, + "step": 1461 + }, + { + "epoch": 0.7225997775855678, + "grad_norm": 1.140979323325151, + "learning_rate": 1.5166518707916714e-05, + "loss": 0.3388645648956299, + "step": 1462 + }, + { + "epoch": 0.7230940318794019, + "grad_norm": 1.098469502784105, + "learning_rate": 1.5159521359453661e-05, + "loss": 0.3048557639122009, + "step": 1463 + }, + { + "epoch": 0.7235882861732361, + "grad_norm": 1.0437743408474436, + "learning_rate": 1.5152520566567873e-05, + "loss": 0.32128047943115234, + "step": 1464 + }, + { + "epoch": 0.7240825404670703, + "grad_norm": 1.0754519434907805, + "learning_rate": 1.5145516333932973e-05, + "loss": 0.3016900420188904, + "step": 1465 + }, + { + "epoch": 0.7245767947609045, + "grad_norm": 0.9730419604339762, + "learning_rate": 1.5138508666224892e-05, + "loss": 0.27410340309143066, + "step": 1466 + }, + { + "epoch": 0.7250710490547386, + "grad_norm": 1.1548137674896846, + "learning_rate": 1.513149756812184e-05, + "loss": 0.314311146736145, + "step": 1467 + }, + { + "epoch": 0.7255653033485728, + "grad_norm": 1.0652992161056178, + "learning_rate": 1.5124483044304339e-05, + "loss": 0.300488144159317, + "step": 1468 + }, + { + "epoch": 0.726059557642407, + "grad_norm": 1.0437811199768454, + "learning_rate": 1.5117465099455173e-05, + "loss": 0.2610424757003784, + "step": 1469 + }, + { + "epoch": 0.7265538119362412, + "grad_norm": 1.0473843452456588, + "learning_rate": 1.5110443738259425e-05, + "loss": 0.2631368637084961, + "step": 1470 + }, + { + "epoch": 0.7270480662300753, + "grad_norm": 1.1572872923696271, + "learning_rate": 1.510341896540446e-05, + "loss": 0.2894716262817383, + "step": 1471 + }, + { + "epoch": 0.7275423205239095, + "grad_norm": 1.1539682565039295, + "learning_rate": 1.5096390785579913e-05, + "loss": 0.2859206199645996, + "step": 1472 + }, + { + "epoch": 0.7280365748177438, + "grad_norm": 1.1861776477785995, + "learning_rate": 1.5089359203477693e-05, + "loss": 0.2966008484363556, + "step": 1473 + }, + { + "epoch": 0.728530829111578, + "grad_norm": 1.0911088494470613, + "learning_rate": 1.5082324223791988e-05, + "loss": 0.3187675476074219, + "step": 1474 + }, + { + "epoch": 0.729025083405412, + "grad_norm": 1.1920802680772398, + "learning_rate": 1.507528585121925e-05, + "loss": 0.32434171438217163, + "step": 1475 + }, + { + "epoch": 0.7295193376992463, + "grad_norm": 1.233732485912319, + "learning_rate": 1.5068244090458197e-05, + "loss": 0.3518364429473877, + "step": 1476 + }, + { + "epoch": 0.7300135919930805, + "grad_norm": 1.091189612496036, + "learning_rate": 1.50611989462098e-05, + "loss": 0.32294291257858276, + "step": 1477 + }, + { + "epoch": 0.7305078462869146, + "grad_norm": 1.184027940449126, + "learning_rate": 1.5054150423177307e-05, + "loss": 0.3413415253162384, + "step": 1478 + }, + { + "epoch": 0.7310021005807488, + "grad_norm": 1.1760745568840743, + "learning_rate": 1.5047098526066207e-05, + "loss": 0.3562566637992859, + "step": 1479 + }, + { + "epoch": 0.731496354874583, + "grad_norm": 1.130494844464842, + "learning_rate": 1.504004325958424e-05, + "loss": 0.30018410086631775, + "step": 1480 + }, + { + "epoch": 0.7319906091684172, + "grad_norm": 1.027268124102698, + "learning_rate": 1.5032984628441409e-05, + "loss": 0.2937701344490051, + "step": 1481 + }, + { + "epoch": 0.7324848634622513, + "grad_norm": 1.131154387943882, + "learning_rate": 1.5025922637349953e-05, + "loss": 0.3268740773200989, + "step": 1482 + }, + { + "epoch": 0.7329791177560855, + "grad_norm": 1.053089747814938, + "learning_rate": 1.5018857291024356e-05, + "loss": 0.3246314525604248, + "step": 1483 + }, + { + "epoch": 0.7334733720499197, + "grad_norm": 1.033026683314433, + "learning_rate": 1.501178859418134e-05, + "loss": 0.276904433965683, + "step": 1484 + }, + { + "epoch": 0.7339676263437539, + "grad_norm": 1.1901915790154476, + "learning_rate": 1.5004716551539873e-05, + "loss": 0.27665287256240845, + "step": 1485 + }, + { + "epoch": 0.734461880637588, + "grad_norm": 1.065690181516995, + "learning_rate": 1.4997641167821143e-05, + "loss": 0.325985848903656, + "step": 1486 + }, + { + "epoch": 0.7349561349314222, + "grad_norm": 1.2333398180696593, + "learning_rate": 1.4990562447748573e-05, + "loss": 0.2951817214488983, + "step": 1487 + }, + { + "epoch": 0.7354503892252564, + "grad_norm": 1.0415622998394476, + "learning_rate": 1.4983480396047822e-05, + "loss": 0.2592772841453552, + "step": 1488 + }, + { + "epoch": 0.7359446435190906, + "grad_norm": 1.0977128928049222, + "learning_rate": 1.4976395017446767e-05, + "loss": 0.3278253674507141, + "step": 1489 + }, + { + "epoch": 0.7364388978129247, + "grad_norm": 2.4840016288238886, + "learning_rate": 1.4969306316675497e-05, + "loss": 0.32366445660591125, + "step": 1490 + }, + { + "epoch": 0.7369331521067589, + "grad_norm": 1.065618785924185, + "learning_rate": 1.4962214298466337e-05, + "loss": 0.30544513463974, + "step": 1491 + }, + { + "epoch": 0.7374274064005931, + "grad_norm": 1.1151764286390358, + "learning_rate": 1.4955118967553812e-05, + "loss": 0.3712898790836334, + "step": 1492 + }, + { + "epoch": 0.7379216606944273, + "grad_norm": 1.072095940180716, + "learning_rate": 1.4948020328674662e-05, + "loss": 0.3006438612937927, + "step": 1493 + }, + { + "epoch": 0.7384159149882614, + "grad_norm": 1.1145573413296936, + "learning_rate": 1.494091838656784e-05, + "loss": 0.3494953215122223, + "step": 1494 + }, + { + "epoch": 0.7389101692820956, + "grad_norm": 1.091824613740768, + "learning_rate": 1.4933813145974504e-05, + "loss": 0.2698785662651062, + "step": 1495 + }, + { + "epoch": 0.7394044235759298, + "grad_norm": 1.1072713673032075, + "learning_rate": 1.4926704611638003e-05, + "loss": 0.34775635600090027, + "step": 1496 + }, + { + "epoch": 0.739898677869764, + "grad_norm": 1.1542085278706422, + "learning_rate": 1.4919592788303898e-05, + "loss": 0.328175812959671, + "step": 1497 + }, + { + "epoch": 0.7403929321635981, + "grad_norm": 1.1735161292651393, + "learning_rate": 1.491247768071994e-05, + "loss": 0.3320178687572479, + "step": 1498 + }, + { + "epoch": 0.7408871864574323, + "grad_norm": 1.8687355330582882, + "learning_rate": 1.4905359293636074e-05, + "loss": 0.308150053024292, + "step": 1499 + }, + { + "epoch": 0.7413814407512666, + "grad_norm": 1.1422704685641505, + "learning_rate": 1.489823763180443e-05, + "loss": 0.3311570882797241, + "step": 1500 + }, + { + "epoch": 0.7418756950451008, + "grad_norm": 1.2844910379105308, + "learning_rate": 1.4891112699979334e-05, + "loss": 0.36916327476501465, + "step": 1501 + }, + { + "epoch": 0.7423699493389349, + "grad_norm": 1.0354244070195735, + "learning_rate": 1.4883984502917286e-05, + "loss": 0.28005337715148926, + "step": 1502 + }, + { + "epoch": 0.7428642036327691, + "grad_norm": 1.2241818166146565, + "learning_rate": 1.4876853045376962e-05, + "loss": 0.3502781391143799, + "step": 1503 + }, + { + "epoch": 0.7433584579266033, + "grad_norm": 1.2448349850537428, + "learning_rate": 1.4869718332119232e-05, + "loss": 0.32032880187034607, + "step": 1504 + }, + { + "epoch": 0.7438527122204374, + "grad_norm": 1.1236679189592251, + "learning_rate": 1.4862580367907118e-05, + "loss": 0.3229472041130066, + "step": 1505 + }, + { + "epoch": 0.7443469665142716, + "grad_norm": 1.087360074547477, + "learning_rate": 1.4855439157505833e-05, + "loss": 0.2725368142127991, + "step": 1506 + }, + { + "epoch": 0.7448412208081058, + "grad_norm": 1.2509876854452482, + "learning_rate": 1.4848294705682737e-05, + "loss": 0.35358861088752747, + "step": 1507 + }, + { + "epoch": 0.74533547510194, + "grad_norm": 1.0843196708603702, + "learning_rate": 1.4841147017207376e-05, + "loss": 0.299206018447876, + "step": 1508 + }, + { + "epoch": 0.7458297293957741, + "grad_norm": 2.7618594064377384, + "learning_rate": 1.4833996096851432e-05, + "loss": 0.32004314661026, + "step": 1509 + }, + { + "epoch": 0.7463239836896083, + "grad_norm": 1.1399779760270892, + "learning_rate": 1.4826841949388767e-05, + "loss": 0.32800590991973877, + "step": 1510 + }, + { + "epoch": 0.7468182379834425, + "grad_norm": 1.112132363505793, + "learning_rate": 1.4819684579595382e-05, + "loss": 0.2916460335254669, + "step": 1511 + }, + { + "epoch": 0.7473124922772767, + "grad_norm": 1.2041472096070427, + "learning_rate": 1.4812523992249437e-05, + "loss": 0.3276118338108063, + "step": 1512 + }, + { + "epoch": 0.7478067465711108, + "grad_norm": 1.2310079375510266, + "learning_rate": 1.4805360192131234e-05, + "loss": 0.34718069434165955, + "step": 1513 + }, + { + "epoch": 0.748301000864945, + "grad_norm": 1.0130113878676084, + "learning_rate": 1.4798193184023233e-05, + "loss": 0.2810167372226715, + "step": 1514 + }, + { + "epoch": 0.7487952551587792, + "grad_norm": 1.1600230287701154, + "learning_rate": 1.4791022972710017e-05, + "loss": 0.3542296886444092, + "step": 1515 + }, + { + "epoch": 0.7492895094526134, + "grad_norm": 1.0717623685966582, + "learning_rate": 1.4783849562978319e-05, + "loss": 0.27578431367874146, + "step": 1516 + }, + { + "epoch": 0.7497837637464475, + "grad_norm": 1.2193919844014014, + "learning_rate": 1.4776672959617006e-05, + "loss": 0.32235798239707947, + "step": 1517 + }, + { + "epoch": 0.7502780180402817, + "grad_norm": 1.073591922439447, + "learning_rate": 1.4769493167417079e-05, + "loss": 0.30588477849960327, + "step": 1518 + }, + { + "epoch": 0.7507722723341159, + "grad_norm": 1.1259837125407774, + "learning_rate": 1.4762310191171657e-05, + "loss": 0.31242361664772034, + "step": 1519 + }, + { + "epoch": 0.7512665266279501, + "grad_norm": 1.2265290610094162, + "learning_rate": 1.4755124035675995e-05, + "loss": 0.3679526150226593, + "step": 1520 + }, + { + "epoch": 0.7517607809217842, + "grad_norm": 1.0185674037419847, + "learning_rate": 1.4747934705727473e-05, + "loss": 0.28588515520095825, + "step": 1521 + }, + { + "epoch": 0.7522550352156184, + "grad_norm": 1.0624456882482982, + "learning_rate": 1.4740742206125582e-05, + "loss": 0.29861775040626526, + "step": 1522 + }, + { + "epoch": 0.7527492895094526, + "grad_norm": 1.1245071890104912, + "learning_rate": 1.4733546541671928e-05, + "loss": 0.31373754143714905, + "step": 1523 + }, + { + "epoch": 0.7532435438032868, + "grad_norm": 1.1569601569555032, + "learning_rate": 1.472634771717024e-05, + "loss": 0.3127061128616333, + "step": 1524 + }, + { + "epoch": 0.7537377980971209, + "grad_norm": 1.0554556810771654, + "learning_rate": 1.4719145737426346e-05, + "loss": 0.33681541681289673, + "step": 1525 + }, + { + "epoch": 0.7542320523909551, + "grad_norm": 1.1202634511050926, + "learning_rate": 1.4711940607248182e-05, + "loss": 0.30266639590263367, + "step": 1526 + }, + { + "epoch": 0.7547263066847893, + "grad_norm": 1.0915134711866425, + "learning_rate": 1.47047323314458e-05, + "loss": 0.2988300323486328, + "step": 1527 + }, + { + "epoch": 0.7552205609786236, + "grad_norm": 1.1041853232471737, + "learning_rate": 1.4697520914831334e-05, + "loss": 0.32679620385169983, + "step": 1528 + }, + { + "epoch": 0.7557148152724577, + "grad_norm": 1.0049846597819565, + "learning_rate": 1.4690306362219024e-05, + "loss": 0.2935605049133301, + "step": 1529 + }, + { + "epoch": 0.7562090695662919, + "grad_norm": 1.1114952379308272, + "learning_rate": 1.4683088678425204e-05, + "loss": 0.303417831659317, + "step": 1530 + }, + { + "epoch": 0.7567033238601261, + "grad_norm": 1.0605597139601082, + "learning_rate": 1.4675867868268295e-05, + "loss": 0.30822527408599854, + "step": 1531 + }, + { + "epoch": 0.7571975781539602, + "grad_norm": 1.0772522309630048, + "learning_rate": 1.4668643936568807e-05, + "loss": 0.3104674220085144, + "step": 1532 + }, + { + "epoch": 0.7576918324477944, + "grad_norm": 1.0598818436947175, + "learning_rate": 1.4661416888149333e-05, + "loss": 0.27899307012557983, + "step": 1533 + }, + { + "epoch": 0.7581860867416286, + "grad_norm": 1.1291791785743877, + "learning_rate": 1.465418672783455e-05, + "loss": 0.3285380005836487, + "step": 1534 + }, + { + "epoch": 0.7586803410354628, + "grad_norm": 1.0773746767557166, + "learning_rate": 1.4646953460451205e-05, + "loss": 0.32028889656066895, + "step": 1535 + }, + { + "epoch": 0.7591745953292969, + "grad_norm": 1.2647242329167074, + "learning_rate": 1.4639717090828127e-05, + "loss": 0.29870709776878357, + "step": 1536 + }, + { + "epoch": 0.7596688496231311, + "grad_norm": 1.1833149129368068, + "learning_rate": 1.4632477623796216e-05, + "loss": 0.3556699752807617, + "step": 1537 + }, + { + "epoch": 0.7601631039169653, + "grad_norm": 1.158341046754784, + "learning_rate": 1.462523506418843e-05, + "loss": 0.3433789014816284, + "step": 1538 + }, + { + "epoch": 0.7606573582107995, + "grad_norm": 1.2526530715160118, + "learning_rate": 1.4617989416839802e-05, + "loss": 0.3146114945411682, + "step": 1539 + }, + { + "epoch": 0.7611516125046336, + "grad_norm": 1.0254798742920868, + "learning_rate": 1.4610740686587424e-05, + "loss": 0.29029202461242676, + "step": 1540 + }, + { + "epoch": 0.7616458667984678, + "grad_norm": 1.2367212561484746, + "learning_rate": 1.4603488878270442e-05, + "loss": 0.2976688742637634, + "step": 1541 + }, + { + "epoch": 0.762140121092302, + "grad_norm": 1.0384933941129642, + "learning_rate": 1.459623399673006e-05, + "loss": 0.28604352474212646, + "step": 1542 + }, + { + "epoch": 0.7626343753861362, + "grad_norm": 1.2070329997652125, + "learning_rate": 1.4588976046809536e-05, + "loss": 0.34977301955223083, + "step": 1543 + }, + { + "epoch": 0.7631286296799703, + "grad_norm": 1.1108699937366455, + "learning_rate": 1.458171503335417e-05, + "loss": 0.31592974066734314, + "step": 1544 + }, + { + "epoch": 0.7636228839738045, + "grad_norm": 1.224645404968216, + "learning_rate": 1.4574450961211312e-05, + "loss": 0.31539830565452576, + "step": 1545 + }, + { + "epoch": 0.7641171382676387, + "grad_norm": 1.1914008033212045, + "learning_rate": 1.4567183835230355e-05, + "loss": 0.3100752532482147, + "step": 1546 + }, + { + "epoch": 0.7646113925614729, + "grad_norm": 1.1973069016485758, + "learning_rate": 1.4559913660262726e-05, + "loss": 0.31005364656448364, + "step": 1547 + }, + { + "epoch": 0.765105646855307, + "grad_norm": 1.102020410207535, + "learning_rate": 1.4552640441161889e-05, + "loss": 0.3050577640533447, + "step": 1548 + }, + { + "epoch": 0.7655999011491412, + "grad_norm": 1.1151715417212549, + "learning_rate": 1.4545364182783343e-05, + "loss": 0.294721394777298, + "step": 1549 + }, + { + "epoch": 0.7660941554429754, + "grad_norm": 1.0907882805879732, + "learning_rate": 1.4538084889984616e-05, + "loss": 0.2974075376987457, + "step": 1550 + }, + { + "epoch": 0.7665884097368096, + "grad_norm": 1.218483256285566, + "learning_rate": 1.4530802567625259e-05, + "loss": 0.3247089385986328, + "step": 1551 + }, + { + "epoch": 0.7670826640306437, + "grad_norm": 1.1751647980540385, + "learning_rate": 1.4523517220566843e-05, + "loss": 0.3219151198863983, + "step": 1552 + }, + { + "epoch": 0.7675769183244779, + "grad_norm": 1.092743254793347, + "learning_rate": 1.4516228853672962e-05, + "loss": 0.30580246448516846, + "step": 1553 + }, + { + "epoch": 0.7680711726183121, + "grad_norm": 1.0670412493946726, + "learning_rate": 1.4508937471809233e-05, + "loss": 0.2983207702636719, + "step": 1554 + }, + { + "epoch": 0.7685654269121464, + "grad_norm": 1.1155603134808716, + "learning_rate": 1.4501643079843266e-05, + "loss": 0.3429039418697357, + "step": 1555 + }, + { + "epoch": 0.7690596812059804, + "grad_norm": 1.0600298870014666, + "learning_rate": 1.4494345682644704e-05, + "loss": 0.3055192530155182, + "step": 1556 + }, + { + "epoch": 0.7695539354998147, + "grad_norm": 1.0843598968647987, + "learning_rate": 1.4487045285085178e-05, + "loss": 0.2964102327823639, + "step": 1557 + }, + { + "epoch": 0.7700481897936489, + "grad_norm": 1.0436581793993642, + "learning_rate": 1.4479741892038335e-05, + "loss": 0.3088444471359253, + "step": 1558 + }, + { + "epoch": 0.770542444087483, + "grad_norm": 1.070280126063037, + "learning_rate": 1.4472435508379808e-05, + "loss": 0.28697890043258667, + "step": 1559 + }, + { + "epoch": 0.7710366983813172, + "grad_norm": 1.1055317673748768, + "learning_rate": 1.4465126138987242e-05, + "loss": 0.3664681315422058, + "step": 1560 + }, + { + "epoch": 0.7715309526751514, + "grad_norm": 1.1042702127280148, + "learning_rate": 1.4457813788740263e-05, + "loss": 0.3282932937145233, + "step": 1561 + }, + { + "epoch": 0.7720252069689856, + "grad_norm": 1.4857133307558297, + "learning_rate": 1.4450498462520495e-05, + "loss": 0.27597576379776, + "step": 1562 + }, + { + "epoch": 0.7725194612628197, + "grad_norm": 1.2214452597170176, + "learning_rate": 1.4443180165211541e-05, + "loss": 0.3553946614265442, + "step": 1563 + }, + { + "epoch": 0.7730137155566539, + "grad_norm": 1.1827716129984904, + "learning_rate": 1.4435858901698995e-05, + "loss": 0.36224859952926636, + "step": 1564 + }, + { + "epoch": 0.7735079698504881, + "grad_norm": 1.1341836510498036, + "learning_rate": 1.4428534676870427e-05, + "loss": 0.2940914034843445, + "step": 1565 + }, + { + "epoch": 0.7740022241443223, + "grad_norm": 0.9563512574257287, + "learning_rate": 1.4421207495615385e-05, + "loss": 0.2717741131782532, + "step": 1566 + }, + { + "epoch": 0.7744964784381564, + "grad_norm": 1.1327871067959112, + "learning_rate": 1.441387736282539e-05, + "loss": 0.32340431213378906, + "step": 1567 + }, + { + "epoch": 0.7749907327319906, + "grad_norm": 1.1090264087970254, + "learning_rate": 1.4406544283393935e-05, + "loss": 0.3080120086669922, + "step": 1568 + }, + { + "epoch": 0.7754849870258248, + "grad_norm": 1.4441577426158039, + "learning_rate": 1.4399208262216475e-05, + "loss": 0.3118380308151245, + "step": 1569 + }, + { + "epoch": 0.775979241319659, + "grad_norm": 1.3307213271784917, + "learning_rate": 1.439186930419044e-05, + "loss": 0.3086084723472595, + "step": 1570 + }, + { + "epoch": 0.7764734956134931, + "grad_norm": 1.1593176371811458, + "learning_rate": 1.438452741421521e-05, + "loss": 0.3233364522457123, + "step": 1571 + }, + { + "epoch": 0.7769677499073273, + "grad_norm": 1.0623874748102813, + "learning_rate": 1.4377182597192124e-05, + "loss": 0.29029640555381775, + "step": 1572 + }, + { + "epoch": 0.7774620042011615, + "grad_norm": 0.9791711244739897, + "learning_rate": 1.4369834858024476e-05, + "loss": 0.2888006567955017, + "step": 1573 + }, + { + "epoch": 0.7779562584949957, + "grad_norm": 1.1118016172702438, + "learning_rate": 1.4362484201617519e-05, + "loss": 0.3260151743888855, + "step": 1574 + }, + { + "epoch": 0.7784505127888298, + "grad_norm": 1.3306536044832058, + "learning_rate": 1.4355130632878439e-05, + "loss": 0.333207905292511, + "step": 1575 + }, + { + "epoch": 0.778944767082664, + "grad_norm": 1.0844273121477916, + "learning_rate": 1.4347774156716375e-05, + "loss": 0.2577935457229614, + "step": 1576 + }, + { + "epoch": 0.7794390213764982, + "grad_norm": 1.0777103823564191, + "learning_rate": 1.434041477804241e-05, + "loss": 0.29645979404449463, + "step": 1577 + }, + { + "epoch": 0.7799332756703324, + "grad_norm": 1.1743796307407597, + "learning_rate": 1.433305250176955e-05, + "loss": 0.2973156273365021, + "step": 1578 + }, + { + "epoch": 0.7804275299641665, + "grad_norm": 1.0277241805983874, + "learning_rate": 1.4325687332812754e-05, + "loss": 0.29159975051879883, + "step": 1579 + }, + { + "epoch": 0.7809217842580007, + "grad_norm": 1.1751334806332727, + "learning_rate": 1.4318319276088902e-05, + "loss": 0.29718664288520813, + "step": 1580 + }, + { + "epoch": 0.781416038551835, + "grad_norm": 1.316577919508971, + "learning_rate": 1.4310948336516803e-05, + "loss": 0.3262369632720947, + "step": 1581 + }, + { + "epoch": 0.781910292845669, + "grad_norm": 1.182680350644687, + "learning_rate": 1.4303574519017187e-05, + "loss": 0.36491623520851135, + "step": 1582 + }, + { + "epoch": 0.7824045471395032, + "grad_norm": 1.181580153295467, + "learning_rate": 1.4296197828512716e-05, + "loss": 0.3558582365512848, + "step": 1583 + }, + { + "epoch": 0.7828988014333375, + "grad_norm": 0.9802630700834107, + "learning_rate": 1.428881826992796e-05, + "loss": 0.2745930552482605, + "step": 1584 + }, + { + "epoch": 0.7833930557271717, + "grad_norm": 1.1668091765691224, + "learning_rate": 1.4281435848189404e-05, + "loss": 0.3239384889602661, + "step": 1585 + }, + { + "epoch": 0.7838873100210058, + "grad_norm": 1.0164738185404556, + "learning_rate": 1.4274050568225452e-05, + "loss": 0.2708761692047119, + "step": 1586 + }, + { + "epoch": 0.78438156431484, + "grad_norm": 1.2356501028179845, + "learning_rate": 1.4266662434966412e-05, + "loss": 0.3633013963699341, + "step": 1587 + }, + { + "epoch": 0.7848758186086742, + "grad_norm": 1.2145151160613337, + "learning_rate": 1.425927145334449e-05, + "loss": 0.36411651968955994, + "step": 1588 + }, + { + "epoch": 0.7853700729025084, + "grad_norm": 1.2093753197442545, + "learning_rate": 1.4251877628293804e-05, + "loss": 0.3120966851711273, + "step": 1589 + }, + { + "epoch": 0.7858643271963425, + "grad_norm": 1.111474907013162, + "learning_rate": 1.4244480964750365e-05, + "loss": 0.32788634300231934, + "step": 1590 + }, + { + "epoch": 0.7863585814901767, + "grad_norm": 1.1320230499507122, + "learning_rate": 1.423708146765208e-05, + "loss": 0.2919159233570099, + "step": 1591 + }, + { + "epoch": 0.7868528357840109, + "grad_norm": 1.1271090926469096, + "learning_rate": 1.4229679141938749e-05, + "loss": 0.3135683834552765, + "step": 1592 + }, + { + "epoch": 0.7873470900778451, + "grad_norm": 1.2447784007425877, + "learning_rate": 1.4222273992552058e-05, + "loss": 0.351981520652771, + "step": 1593 + }, + { + "epoch": 0.7878413443716792, + "grad_norm": 1.1846979202846248, + "learning_rate": 1.4214866024435576e-05, + "loss": 0.3615785837173462, + "step": 1594 + }, + { + "epoch": 0.7883355986655134, + "grad_norm": 1.1632616021817466, + "learning_rate": 1.420745524253476e-05, + "loss": 0.29399484395980835, + "step": 1595 + }, + { + "epoch": 0.7888298529593476, + "grad_norm": 1.1714512606078011, + "learning_rate": 1.420004165179694e-05, + "loss": 0.30501872301101685, + "step": 1596 + }, + { + "epoch": 0.7893241072531818, + "grad_norm": 1.1172632404953093, + "learning_rate": 1.4192625257171331e-05, + "loss": 0.33745667338371277, + "step": 1597 + }, + { + "epoch": 0.7898183615470159, + "grad_norm": 0.994693525988225, + "learning_rate": 1.4185206063609e-05, + "loss": 0.2675662934780121, + "step": 1598 + }, + { + "epoch": 0.7903126158408501, + "grad_norm": 1.022107075414073, + "learning_rate": 1.41777840760629e-05, + "loss": 0.295659601688385, + "step": 1599 + }, + { + "epoch": 0.7908068701346843, + "grad_norm": 1.119079517603524, + "learning_rate": 1.4170359299487848e-05, + "loss": 0.3164275586605072, + "step": 1600 + }, + { + "epoch": 0.7913011244285185, + "grad_norm": 1.0695885495482724, + "learning_rate": 1.416293173884051e-05, + "loss": 0.3039100766181946, + "step": 1601 + }, + { + "epoch": 0.7917953787223526, + "grad_norm": 1.1080665801372258, + "learning_rate": 1.4155501399079427e-05, + "loss": 0.2994040846824646, + "step": 1602 + }, + { + "epoch": 0.7922896330161868, + "grad_norm": 1.3291271745996591, + "learning_rate": 1.4148068285164984e-05, + "loss": 0.3129369616508484, + "step": 1603 + }, + { + "epoch": 0.792783887310021, + "grad_norm": 1.084724718149673, + "learning_rate": 1.4140632402059424e-05, + "loss": 0.3223167657852173, + "step": 1604 + }, + { + "epoch": 0.7932781416038552, + "grad_norm": 1.0882285752839331, + "learning_rate": 1.4133193754726834e-05, + "loss": 0.2734811305999756, + "step": 1605 + }, + { + "epoch": 0.7937723958976893, + "grad_norm": 1.103029405529104, + "learning_rate": 1.4125752348133148e-05, + "loss": 0.27474087476730347, + "step": 1606 + }, + { + "epoch": 0.7942666501915235, + "grad_norm": 1.0487344928171054, + "learning_rate": 1.4118308187246145e-05, + "loss": 0.2619907557964325, + "step": 1607 + }, + { + "epoch": 0.7947609044853577, + "grad_norm": 1.1981880636142406, + "learning_rate": 1.411086127703544e-05, + "loss": 0.3176937699317932, + "step": 1608 + }, + { + "epoch": 0.7952551587791918, + "grad_norm": 1.12323060393325, + "learning_rate": 1.4103411622472483e-05, + "loss": 0.28044235706329346, + "step": 1609 + }, + { + "epoch": 0.795749413073026, + "grad_norm": 1.0360499320558048, + "learning_rate": 1.409595922853056e-05, + "loss": 0.27778196334838867, + "step": 1610 + }, + { + "epoch": 0.7962436673668603, + "grad_norm": 1.2703211339383462, + "learning_rate": 1.4088504100184777e-05, + "loss": 0.3168628513813019, + "step": 1611 + }, + { + "epoch": 0.7967379216606945, + "grad_norm": 1.1557608708585085, + "learning_rate": 1.4081046242412075e-05, + "loss": 0.30454084277153015, + "step": 1612 + }, + { + "epoch": 0.7972321759545286, + "grad_norm": 1.169123128871501, + "learning_rate": 1.4073585660191214e-05, + "loss": 0.34019169211387634, + "step": 1613 + }, + { + "epoch": 0.7977264302483628, + "grad_norm": 1.173717391982327, + "learning_rate": 1.4066122358502772e-05, + "loss": 0.3044774830341339, + "step": 1614 + }, + { + "epoch": 0.798220684542197, + "grad_norm": 1.1570346377203322, + "learning_rate": 1.4058656342329136e-05, + "loss": 0.3181847333908081, + "step": 1615 + }, + { + "epoch": 0.7987149388360312, + "grad_norm": 1.249158616205248, + "learning_rate": 1.405118761665452e-05, + "loss": 0.3400845229625702, + "step": 1616 + }, + { + "epoch": 0.7992091931298653, + "grad_norm": 1.2103435711338524, + "learning_rate": 1.4043716186464935e-05, + "loss": 0.2845221161842346, + "step": 1617 + }, + { + "epoch": 0.7997034474236995, + "grad_norm": 1.060854004382088, + "learning_rate": 1.4036242056748202e-05, + "loss": 0.27315276861190796, + "step": 1618 + }, + { + "epoch": 0.8001977017175337, + "grad_norm": 1.2994888590220768, + "learning_rate": 1.4028765232493942e-05, + "loss": 0.3388780951499939, + "step": 1619 + }, + { + "epoch": 0.8006919560113679, + "grad_norm": 1.282329812705599, + "learning_rate": 1.4021285718693581e-05, + "loss": 0.338635116815567, + "step": 1620 + }, + { + "epoch": 0.801186210305202, + "grad_norm": 1.051985157077811, + "learning_rate": 1.4013803520340328e-05, + "loss": 0.26962924003601074, + "step": 1621 + }, + { + "epoch": 0.8016804645990362, + "grad_norm": 1.119736165525956, + "learning_rate": 1.4006318642429194e-05, + "loss": 0.32106393575668335, + "step": 1622 + }, + { + "epoch": 0.8021747188928704, + "grad_norm": 1.1215264874092639, + "learning_rate": 1.399883108995698e-05, + "loss": 0.33063358068466187, + "step": 1623 + }, + { + "epoch": 0.8026689731867046, + "grad_norm": 1.2875541426354853, + "learning_rate": 1.3991340867922266e-05, + "loss": 0.31906163692474365, + "step": 1624 + }, + { + "epoch": 0.8031632274805387, + "grad_norm": 1.0397829646035845, + "learning_rate": 1.3983847981325415e-05, + "loss": 0.2601381242275238, + "step": 1625 + }, + { + "epoch": 0.8036574817743729, + "grad_norm": 1.1557585059548563, + "learning_rate": 1.3976352435168577e-05, + "loss": 0.3342537581920624, + "step": 1626 + }, + { + "epoch": 0.8041517360682071, + "grad_norm": 1.2564737583224261, + "learning_rate": 1.3968854234455669e-05, + "loss": 0.3372059166431427, + "step": 1627 + }, + { + "epoch": 0.8046459903620413, + "grad_norm": 1.1676806235835944, + "learning_rate": 1.3961353384192377e-05, + "loss": 0.31026744842529297, + "step": 1628 + }, + { + "epoch": 0.8051402446558754, + "grad_norm": 1.0921501695742, + "learning_rate": 1.3953849889386173e-05, + "loss": 0.2867652177810669, + "step": 1629 + }, + { + "epoch": 0.8056344989497096, + "grad_norm": 1.1055169200249502, + "learning_rate": 1.3946343755046274e-05, + "loss": 0.29169392585754395, + "step": 1630 + }, + { + "epoch": 0.8061287532435438, + "grad_norm": 1.0753220774925722, + "learning_rate": 1.393883498618367e-05, + "loss": 0.2976510524749756, + "step": 1631 + }, + { + "epoch": 0.806623007537378, + "grad_norm": 1.1387290098549956, + "learning_rate": 1.3931323587811107e-05, + "loss": 0.2900371551513672, + "step": 1632 + }, + { + "epoch": 0.8071172618312121, + "grad_norm": 1.0560549112494348, + "learning_rate": 1.3923809564943093e-05, + "loss": 0.31660354137420654, + "step": 1633 + }, + { + "epoch": 0.8076115161250463, + "grad_norm": 1.079892158607702, + "learning_rate": 1.3916292922595875e-05, + "loss": 0.3099827468395233, + "step": 1634 + }, + { + "epoch": 0.8081057704188805, + "grad_norm": 1.1450154190444473, + "learning_rate": 1.3908773665787459e-05, + "loss": 0.34322571754455566, + "step": 1635 + }, + { + "epoch": 0.8086000247127146, + "grad_norm": 1.0812992610334402, + "learning_rate": 1.3901251799537592e-05, + "loss": 0.2780989408493042, + "step": 1636 + }, + { + "epoch": 0.8090942790065488, + "grad_norm": 1.1023331343203706, + "learning_rate": 1.389372732886777e-05, + "loss": 0.31049463152885437, + "step": 1637 + }, + { + "epoch": 0.809588533300383, + "grad_norm": 1.0442513462466116, + "learning_rate": 1.3886200258801213e-05, + "loss": 0.29925107955932617, + "step": 1638 + }, + { + "epoch": 0.8100827875942173, + "grad_norm": 1.1039524368767084, + "learning_rate": 1.3878670594362893e-05, + "loss": 0.31893983483314514, + "step": 1639 + }, + { + "epoch": 0.8105770418880514, + "grad_norm": 1.1529184850949745, + "learning_rate": 1.3871138340579502e-05, + "loss": 0.31307080388069153, + "step": 1640 + }, + { + "epoch": 0.8110712961818856, + "grad_norm": 1.068880489111062, + "learning_rate": 1.3863603502479465e-05, + "loss": 0.28198909759521484, + "step": 1641 + }, + { + "epoch": 0.8115655504757198, + "grad_norm": 1.085975037148026, + "learning_rate": 1.3856066085092936e-05, + "loss": 0.28937461972236633, + "step": 1642 + }, + { + "epoch": 0.812059804769554, + "grad_norm": 1.2145338549731968, + "learning_rate": 1.3848526093451789e-05, + "loss": 0.32332292199134827, + "step": 1643 + }, + { + "epoch": 0.8125540590633881, + "grad_norm": 1.1174132019487801, + "learning_rate": 1.3840983532589606e-05, + "loss": 0.3059847056865692, + "step": 1644 + }, + { + "epoch": 0.8130483133572223, + "grad_norm": 1.0961695985122493, + "learning_rate": 1.3833438407541698e-05, + "loss": 0.2939583957195282, + "step": 1645 + }, + { + "epoch": 0.8135425676510565, + "grad_norm": 1.098340825845408, + "learning_rate": 1.3825890723345082e-05, + "loss": 0.3293933868408203, + "step": 1646 + }, + { + "epoch": 0.8140368219448907, + "grad_norm": 1.230371641918686, + "learning_rate": 1.3818340485038488e-05, + "loss": 0.33373600244522095, + "step": 1647 + }, + { + "epoch": 0.8145310762387248, + "grad_norm": 1.0438422749127716, + "learning_rate": 1.3810787697662337e-05, + "loss": 0.2716716527938843, + "step": 1648 + }, + { + "epoch": 0.815025330532559, + "grad_norm": 1.0698510483790238, + "learning_rate": 1.3803232366258774e-05, + "loss": 0.26109835505485535, + "step": 1649 + }, + { + "epoch": 0.8155195848263932, + "grad_norm": 1.1399890180248013, + "learning_rate": 1.3795674495871627e-05, + "loss": 0.3161536753177643, + "step": 1650 + }, + { + "epoch": 0.8160138391202274, + "grad_norm": 1.0602857079391073, + "learning_rate": 1.3788114091546414e-05, + "loss": 0.3078432083129883, + "step": 1651 + }, + { + "epoch": 0.8165080934140615, + "grad_norm": 1.2174185154702881, + "learning_rate": 1.3780551158330364e-05, + "loss": 0.31023627519607544, + "step": 1652 + }, + { + "epoch": 0.8170023477078957, + "grad_norm": 1.2616374354619766, + "learning_rate": 1.3772985701272374e-05, + "loss": 0.3438849151134491, + "step": 1653 + }, + { + "epoch": 0.8174966020017299, + "grad_norm": 1.178467386446937, + "learning_rate": 1.376541772542304e-05, + "loss": 0.31897789239883423, + "step": 1654 + }, + { + "epoch": 0.8179908562955641, + "grad_norm": 1.1125307511503921, + "learning_rate": 1.3757847235834636e-05, + "loss": 0.3101171553134918, + "step": 1655 + }, + { + "epoch": 0.8184851105893982, + "grad_norm": 1.0084998738545823, + "learning_rate": 1.375027423756111e-05, + "loss": 0.28926995396614075, + "step": 1656 + }, + { + "epoch": 0.8189793648832324, + "grad_norm": 1.0711136459333532, + "learning_rate": 1.3742698735658087e-05, + "loss": 0.322610080242157, + "step": 1657 + }, + { + "epoch": 0.8194736191770666, + "grad_norm": 1.272869487692088, + "learning_rate": 1.3735120735182865e-05, + "loss": 0.27430039644241333, + "step": 1658 + }, + { + "epoch": 0.8199678734709008, + "grad_norm": 1.217768709412782, + "learning_rate": 1.3727540241194408e-05, + "loss": 0.3091571629047394, + "step": 1659 + }, + { + "epoch": 0.8204621277647349, + "grad_norm": 1.1956758026057746, + "learning_rate": 1.3719957258753347e-05, + "loss": 0.3039378523826599, + "step": 1660 + }, + { + "epoch": 0.8209563820585691, + "grad_norm": 1.0829759157920493, + "learning_rate": 1.371237179292197e-05, + "loss": 0.29711851477622986, + "step": 1661 + }, + { + "epoch": 0.8214506363524033, + "grad_norm": 1.238383962420106, + "learning_rate": 1.370478384876423e-05, + "loss": 0.32411956787109375, + "step": 1662 + }, + { + "epoch": 0.8219448906462374, + "grad_norm": 1.124344248809279, + "learning_rate": 1.3697193431345725e-05, + "loss": 0.2981719672679901, + "step": 1663 + }, + { + "epoch": 0.8224391449400716, + "grad_norm": 1.1755056696925432, + "learning_rate": 1.3689600545733713e-05, + "loss": 0.32756730914115906, + "step": 1664 + }, + { + "epoch": 0.8229333992339058, + "grad_norm": 1.2289352827455349, + "learning_rate": 1.3682005196997094e-05, + "loss": 0.3910979628562927, + "step": 1665 + }, + { + "epoch": 0.82342765352774, + "grad_norm": 1.1421419999727185, + "learning_rate": 1.3674407390206417e-05, + "loss": 0.31716856360435486, + "step": 1666 + }, + { + "epoch": 0.8239219078215742, + "grad_norm": 1.087053372594853, + "learning_rate": 1.3666807130433865e-05, + "loss": 0.31816208362579346, + "step": 1667 + }, + { + "epoch": 0.8244161621154084, + "grad_norm": 1.075967107077939, + "learning_rate": 1.3659204422753265e-05, + "loss": 0.3008955121040344, + "step": 1668 + }, + { + "epoch": 0.8249104164092426, + "grad_norm": 1.0356290376868373, + "learning_rate": 1.3651599272240078e-05, + "loss": 0.2957409918308258, + "step": 1669 + }, + { + "epoch": 0.8254046707030768, + "grad_norm": 1.0989692049502364, + "learning_rate": 1.364399168397139e-05, + "loss": 0.33019471168518066, + "step": 1670 + }, + { + "epoch": 0.8258989249969109, + "grad_norm": 1.1429937641963879, + "learning_rate": 1.3636381663025917e-05, + "loss": 0.3532376289367676, + "step": 1671 + }, + { + "epoch": 0.8263931792907451, + "grad_norm": 1.1063068636148639, + "learning_rate": 1.362876921448401e-05, + "loss": 0.2980180084705353, + "step": 1672 + }, + { + "epoch": 0.8268874335845793, + "grad_norm": 1.0418269417044947, + "learning_rate": 1.362115434342762e-05, + "loss": 0.27932479977607727, + "step": 1673 + }, + { + "epoch": 0.8273816878784135, + "grad_norm": 1.1782768860255097, + "learning_rate": 1.3613537054940331e-05, + "loss": 0.2783966064453125, + "step": 1674 + }, + { + "epoch": 0.8278759421722476, + "grad_norm": 1.1803795842967677, + "learning_rate": 1.3605917354107336e-05, + "loss": 0.2957308888435364, + "step": 1675 + }, + { + "epoch": 0.8283701964660818, + "grad_norm": 1.180747940998609, + "learning_rate": 1.3598295246015439e-05, + "loss": 0.31640201807022095, + "step": 1676 + }, + { + "epoch": 0.828864450759916, + "grad_norm": 1.0988354367735653, + "learning_rate": 1.3590670735753047e-05, + "loss": 0.2969709634780884, + "step": 1677 + }, + { + "epoch": 0.8293587050537502, + "grad_norm": 1.1164468460017938, + "learning_rate": 1.3583043828410177e-05, + "loss": 0.34167301654815674, + "step": 1678 + }, + { + "epoch": 0.8298529593475843, + "grad_norm": 1.0956930352290435, + "learning_rate": 1.3575414529078443e-05, + "loss": 0.28540804982185364, + "step": 1679 + }, + { + "epoch": 0.8303472136414185, + "grad_norm": 1.1795719749617215, + "learning_rate": 1.3567782842851054e-05, + "loss": 0.2962091565132141, + "step": 1680 + }, + { + "epoch": 0.8308414679352527, + "grad_norm": 1.1969039130243166, + "learning_rate": 1.3560148774822816e-05, + "loss": 0.3650284707546234, + "step": 1681 + }, + { + "epoch": 0.8313357222290869, + "grad_norm": 1.1374534594887609, + "learning_rate": 1.3552512330090126e-05, + "loss": 0.3134267330169678, + "step": 1682 + }, + { + "epoch": 0.831829976522921, + "grad_norm": 1.0921894303145987, + "learning_rate": 1.3544873513750967e-05, + "loss": 0.3020439147949219, + "step": 1683 + }, + { + "epoch": 0.8323242308167552, + "grad_norm": 0.9393581319245673, + "learning_rate": 1.3537232330904895e-05, + "loss": 0.25083282589912415, + "step": 1684 + }, + { + "epoch": 0.8328184851105894, + "grad_norm": 1.2024147558027563, + "learning_rate": 1.3529588786653063e-05, + "loss": 0.33875352144241333, + "step": 1685 + }, + { + "epoch": 0.8333127394044236, + "grad_norm": 1.0620839528979684, + "learning_rate": 1.3521942886098186e-05, + "loss": 0.2717735171318054, + "step": 1686 + }, + { + "epoch": 0.8338069936982577, + "grad_norm": 1.1255995988400895, + "learning_rate": 1.3514294634344562e-05, + "loss": 0.271842896938324, + "step": 1687 + }, + { + "epoch": 0.8343012479920919, + "grad_norm": 1.3262220000473801, + "learning_rate": 1.3506644036498054e-05, + "loss": 0.29420506954193115, + "step": 1688 + }, + { + "epoch": 0.8347955022859261, + "grad_norm": 1.338127401529371, + "learning_rate": 1.349899109766609e-05, + "loss": 0.3336431682109833, + "step": 1689 + }, + { + "epoch": 0.8352897565797602, + "grad_norm": 1.0514224360912943, + "learning_rate": 1.3491335822957665e-05, + "loss": 0.2848295569419861, + "step": 1690 + }, + { + "epoch": 0.8357840108735944, + "grad_norm": 1.1721842125626762, + "learning_rate": 1.3483678217483327e-05, + "loss": 0.3164542019367218, + "step": 1691 + }, + { + "epoch": 0.8362782651674286, + "grad_norm": 1.1542823329984544, + "learning_rate": 1.3476018286355189e-05, + "loss": 0.3030688762664795, + "step": 1692 + }, + { + "epoch": 0.8367725194612629, + "grad_norm": 1.3329503320081877, + "learning_rate": 1.3468356034686912e-05, + "loss": 0.30218198895454407, + "step": 1693 + }, + { + "epoch": 0.837266773755097, + "grad_norm": 1.1429497129560076, + "learning_rate": 1.3460691467593697e-05, + "loss": 0.3327499032020569, + "step": 1694 + }, + { + "epoch": 0.8377610280489312, + "grad_norm": 1.2198627663252626, + "learning_rate": 1.3453024590192307e-05, + "loss": 0.29298892617225647, + "step": 1695 + }, + { + "epoch": 0.8382552823427654, + "grad_norm": 1.238368209416205, + "learning_rate": 1.344535540760104e-05, + "loss": 0.3096858859062195, + "step": 1696 + }, + { + "epoch": 0.8387495366365996, + "grad_norm": 1.1297510733547198, + "learning_rate": 1.3437683924939731e-05, + "loss": 0.30680233240127563, + "step": 1697 + }, + { + "epoch": 0.8392437909304337, + "grad_norm": 1.13902422944666, + "learning_rate": 1.3430010147329752e-05, + "loss": 0.3139989972114563, + "step": 1698 + }, + { + "epoch": 0.8397380452242679, + "grad_norm": 1.132396621648215, + "learning_rate": 1.3422334079894008e-05, + "loss": 0.30418652296066284, + "step": 1699 + }, + { + "epoch": 0.8402322995181021, + "grad_norm": 1.228592620621731, + "learning_rate": 1.3414655727756931e-05, + "loss": 0.31245100498199463, + "step": 1700 + }, + { + "epoch": 0.8407265538119363, + "grad_norm": 1.1908375195801162, + "learning_rate": 1.3406975096044477e-05, + "loss": 0.3381880223751068, + "step": 1701 + }, + { + "epoch": 0.8412208081057704, + "grad_norm": 1.2009611203254438, + "learning_rate": 1.3399292189884135e-05, + "loss": 0.3359968960285187, + "step": 1702 + }, + { + "epoch": 0.8417150623996046, + "grad_norm": 1.2698512981575327, + "learning_rate": 1.3391607014404891e-05, + "loss": 0.3320350646972656, + "step": 1703 + }, + { + "epoch": 0.8422093166934388, + "grad_norm": 1.1615181813433448, + "learning_rate": 1.3383919574737267e-05, + "loss": 0.32830795645713806, + "step": 1704 + }, + { + "epoch": 0.842703570987273, + "grad_norm": 0.9808785682252426, + "learning_rate": 1.3376229876013285e-05, + "loss": 0.255840927362442, + "step": 1705 + }, + { + "epoch": 0.8431978252811071, + "grad_norm": 1.0739012833500008, + "learning_rate": 1.3368537923366476e-05, + "loss": 0.3110755681991577, + "step": 1706 + }, + { + "epoch": 0.8436920795749413, + "grad_norm": 1.0815136095330147, + "learning_rate": 1.336084372193188e-05, + "loss": 0.28063881397247314, + "step": 1707 + }, + { + "epoch": 0.8441863338687755, + "grad_norm": 1.1539434345644544, + "learning_rate": 1.3353147276846042e-05, + "loss": 0.31297358870506287, + "step": 1708 + }, + { + "epoch": 0.8446805881626097, + "grad_norm": 1.155638509555895, + "learning_rate": 1.3345448593246986e-05, + "loss": 0.30750149488449097, + "step": 1709 + }, + { + "epoch": 0.8451748424564438, + "grad_norm": 1.0259778822912606, + "learning_rate": 1.333774767627425e-05, + "loss": 0.2665224075317383, + "step": 1710 + }, + { + "epoch": 0.845669096750278, + "grad_norm": 1.0618832452009934, + "learning_rate": 1.3330044531068858e-05, + "loss": 0.28920280933380127, + "step": 1711 + }, + { + "epoch": 0.8461633510441122, + "grad_norm": 1.0688762844449171, + "learning_rate": 1.332233916277332e-05, + "loss": 0.2678643465042114, + "step": 1712 + }, + { + "epoch": 0.8466576053379464, + "grad_norm": 1.1389370638959122, + "learning_rate": 1.3314631576531623e-05, + "loss": 0.33682242035865784, + "step": 1713 + }, + { + "epoch": 0.8471518596317805, + "grad_norm": 1.2088936099945806, + "learning_rate": 1.330692177748925e-05, + "loss": 0.36704546213150024, + "step": 1714 + }, + { + "epoch": 0.8476461139256147, + "grad_norm": 1.0972613113130176, + "learning_rate": 1.3299209770793144e-05, + "loss": 0.3183630108833313, + "step": 1715 + }, + { + "epoch": 0.8481403682194489, + "grad_norm": 1.0799352919589156, + "learning_rate": 1.3291495561591736e-05, + "loss": 0.27138596773147583, + "step": 1716 + }, + { + "epoch": 0.848634622513283, + "grad_norm": 1.1332588592044, + "learning_rate": 1.3283779155034925e-05, + "loss": 0.30252328515052795, + "step": 1717 + }, + { + "epoch": 0.8491288768071172, + "grad_norm": 1.1212549613542353, + "learning_rate": 1.3276060556274067e-05, + "loss": 0.29494598507881165, + "step": 1718 + }, + { + "epoch": 0.8496231311009514, + "grad_norm": 1.01148770717553, + "learning_rate": 1.3268339770461988e-05, + "loss": 0.2822422981262207, + "step": 1719 + }, + { + "epoch": 0.8501173853947857, + "grad_norm": 1.134036508201843, + "learning_rate": 1.3260616802752979e-05, + "loss": 0.3348005712032318, + "step": 1720 + }, + { + "epoch": 0.8506116396886197, + "grad_norm": 1.171053745899539, + "learning_rate": 1.3252891658302782e-05, + "loss": 0.3146229088306427, + "step": 1721 + }, + { + "epoch": 0.851105893982454, + "grad_norm": 1.1635384669674214, + "learning_rate": 1.3245164342268592e-05, + "loss": 0.34189414978027344, + "step": 1722 + }, + { + "epoch": 0.8516001482762882, + "grad_norm": 1.0403207041973201, + "learning_rate": 1.3237434859809055e-05, + "loss": 0.2967323958873749, + "step": 1723 + }, + { + "epoch": 0.8520944025701224, + "grad_norm": 1.1011411329678815, + "learning_rate": 1.3229703216084262e-05, + "loss": 0.329689085483551, + "step": 1724 + }, + { + "epoch": 0.8525886568639565, + "grad_norm": 1.1910259713127598, + "learning_rate": 1.3221969416255751e-05, + "loss": 0.33041107654571533, + "step": 1725 + }, + { + "epoch": 0.8530829111577907, + "grad_norm": 1.144468406694428, + "learning_rate": 1.321423346548649e-05, + "loss": 0.30197203159332275, + "step": 1726 + }, + { + "epoch": 0.8535771654516249, + "grad_norm": 1.1709857904248526, + "learning_rate": 1.3206495368940897e-05, + "loss": 0.29060906171798706, + "step": 1727 + }, + { + "epoch": 0.8540714197454591, + "grad_norm": 1.1769143322358042, + "learning_rate": 1.3198755131784808e-05, + "loss": 0.3119436502456665, + "step": 1728 + }, + { + "epoch": 0.8545656740392932, + "grad_norm": 1.1825299188260439, + "learning_rate": 1.31910127591855e-05, + "loss": 0.35256415605545044, + "step": 1729 + }, + { + "epoch": 0.8550599283331274, + "grad_norm": 1.169751710502227, + "learning_rate": 1.3183268256311665e-05, + "loss": 0.3093785345554352, + "step": 1730 + }, + { + "epoch": 0.8555541826269616, + "grad_norm": 1.0555303314758304, + "learning_rate": 1.317552162833343e-05, + "loss": 0.2713086009025574, + "step": 1731 + }, + { + "epoch": 0.8560484369207958, + "grad_norm": 1.1667835049569328, + "learning_rate": 1.3167772880422325e-05, + "loss": 0.3135699927806854, + "step": 1732 + }, + { + "epoch": 0.8565426912146299, + "grad_norm": 1.2127716623193672, + "learning_rate": 1.3160022017751308e-05, + "loss": 0.3077283501625061, + "step": 1733 + }, + { + "epoch": 0.8570369455084641, + "grad_norm": 1.0914461784602205, + "learning_rate": 1.3152269045494744e-05, + "loss": 0.2900918424129486, + "step": 1734 + }, + { + "epoch": 0.8575311998022983, + "grad_norm": 1.1010374385853228, + "learning_rate": 1.3144513968828406e-05, + "loss": 0.30828869342803955, + "step": 1735 + }, + { + "epoch": 0.8580254540961325, + "grad_norm": 1.2038482894608615, + "learning_rate": 1.3136756792929469e-05, + "loss": 0.32526400685310364, + "step": 1736 + }, + { + "epoch": 0.8585197083899666, + "grad_norm": 1.2033734524328428, + "learning_rate": 1.3128997522976518e-05, + "loss": 0.35023608803749084, + "step": 1737 + }, + { + "epoch": 0.8590139626838008, + "grad_norm": 1.0100870731750684, + "learning_rate": 1.312123616414953e-05, + "loss": 0.27287641167640686, + "step": 1738 + }, + { + "epoch": 0.859508216977635, + "grad_norm": 1.1797907328737691, + "learning_rate": 1.3113472721629871e-05, + "loss": 0.346009761095047, + "step": 1739 + }, + { + "epoch": 0.8600024712714691, + "grad_norm": 1.0724791595798373, + "learning_rate": 1.3105707200600312e-05, + "loss": 0.3297504186630249, + "step": 1740 + }, + { + "epoch": 0.8604967255653033, + "grad_norm": 1.1244989642514696, + "learning_rate": 1.3097939606245005e-05, + "loss": 0.29835087060928345, + "step": 1741 + }, + { + "epoch": 0.8609909798591375, + "grad_norm": 1.1715549927893771, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.31466037034988403, + "step": 1742 + }, + { + "epoch": 0.8614852341529717, + "grad_norm": 1.1357283105102867, + "learning_rate": 1.3082398218300646e-05, + "loss": 0.32722294330596924, + "step": 1743 + }, + { + "epoch": 0.8619794884468058, + "grad_norm": 1.0679826147860039, + "learning_rate": 1.3074624435086809e-05, + "loss": 0.2603963613510132, + "step": 1744 + }, + { + "epoch": 0.86247374274064, + "grad_norm": 1.3274641459704042, + "learning_rate": 1.3066848599297633e-05, + "loss": 0.3100607991218567, + "step": 1745 + }, + { + "epoch": 0.8629679970344742, + "grad_norm": 1.0941880035602098, + "learning_rate": 1.3059070716124145e-05, + "loss": 0.2772334814071655, + "step": 1746 + }, + { + "epoch": 0.8634622513283084, + "grad_norm": 1.0597299652706509, + "learning_rate": 1.305129079075876e-05, + "loss": 0.3097267746925354, + "step": 1747 + }, + { + "epoch": 0.8639565056221425, + "grad_norm": 0.9828148026871187, + "learning_rate": 1.304350882839524e-05, + "loss": 0.24734097719192505, + "step": 1748 + }, + { + "epoch": 0.8644507599159768, + "grad_norm": 1.1998311484351691, + "learning_rate": 1.3035724834228713e-05, + "loss": 0.32148587703704834, + "step": 1749 + }, + { + "epoch": 0.864945014209811, + "grad_norm": 1.0741747588028856, + "learning_rate": 1.3027938813455663e-05, + "loss": 0.3037404417991638, + "step": 1750 + }, + { + "epoch": 0.8654392685036452, + "grad_norm": 1.011225470292042, + "learning_rate": 1.3020150771273925e-05, + "loss": 0.30760154128074646, + "step": 1751 + }, + { + "epoch": 0.8659335227974793, + "grad_norm": 1.2184140511087935, + "learning_rate": 1.3012360712882681e-05, + "loss": 0.3169519305229187, + "step": 1752 + }, + { + "epoch": 0.8664277770913135, + "grad_norm": 1.1637013896789081, + "learning_rate": 1.300456864348247e-05, + "loss": 0.32497861981391907, + "step": 1753 + }, + { + "epoch": 0.8669220313851477, + "grad_norm": 1.1775374726585146, + "learning_rate": 1.2996774568275163e-05, + "loss": 0.3318047821521759, + "step": 1754 + }, + { + "epoch": 0.8674162856789819, + "grad_norm": 1.1616476146855954, + "learning_rate": 1.298897849246397e-05, + "loss": 0.32553863525390625, + "step": 1755 + }, + { + "epoch": 0.867910539972816, + "grad_norm": 1.2503555115463478, + "learning_rate": 1.2981180421253446e-05, + "loss": 0.36457520723342896, + "step": 1756 + }, + { + "epoch": 0.8684047942666502, + "grad_norm": 1.169094604095011, + "learning_rate": 1.2973380359849466e-05, + "loss": 0.3038361668586731, + "step": 1757 + }, + { + "epoch": 0.8688990485604844, + "grad_norm": 1.1027242700855706, + "learning_rate": 1.2965578313459246e-05, + "loss": 0.3219846785068512, + "step": 1758 + }, + { + "epoch": 0.8693933028543186, + "grad_norm": 1.1142620036406827, + "learning_rate": 1.2957774287291311e-05, + "loss": 0.3180781304836273, + "step": 1759 + }, + { + "epoch": 0.8698875571481527, + "grad_norm": 1.0882143758359024, + "learning_rate": 1.2949968286555527e-05, + "loss": 0.27302947640419006, + "step": 1760 + }, + { + "epoch": 0.8703818114419869, + "grad_norm": 1.1731384509972838, + "learning_rate": 1.2942160316463066e-05, + "loss": 0.31756314635276794, + "step": 1761 + }, + { + "epoch": 0.8708760657358211, + "grad_norm": 1.1248252560155063, + "learning_rate": 1.2934350382226412e-05, + "loss": 0.2921680510044098, + "step": 1762 + }, + { + "epoch": 0.8713703200296553, + "grad_norm": 1.2402414216219324, + "learning_rate": 1.2926538489059373e-05, + "loss": 0.36426985263824463, + "step": 1763 + }, + { + "epoch": 0.8718645743234894, + "grad_norm": 1.1314972333554063, + "learning_rate": 1.2918724642177054e-05, + "loss": 0.31873831152915955, + "step": 1764 + }, + { + "epoch": 0.8723588286173236, + "grad_norm": 1.0875738556359984, + "learning_rate": 1.2910908846795867e-05, + "loss": 0.30952733755111694, + "step": 1765 + }, + { + "epoch": 0.8728530829111578, + "grad_norm": 1.149047421683754, + "learning_rate": 1.2903091108133523e-05, + "loss": 0.33339035511016846, + "step": 1766 + }, + { + "epoch": 0.8733473372049919, + "grad_norm": 1.1272534729456736, + "learning_rate": 1.2895271431409038e-05, + "loss": 0.31531351804733276, + "step": 1767 + }, + { + "epoch": 0.8738415914988261, + "grad_norm": 1.0832848102714157, + "learning_rate": 1.2887449821842713e-05, + "loss": 0.3016526401042938, + "step": 1768 + }, + { + "epoch": 0.8743358457926603, + "grad_norm": 1.1203275148695215, + "learning_rate": 1.2879626284656141e-05, + "loss": 0.3364630341529846, + "step": 1769 + }, + { + "epoch": 0.8748301000864945, + "grad_norm": 1.049317791331816, + "learning_rate": 1.287180082507221e-05, + "loss": 0.29755398631095886, + "step": 1770 + }, + { + "epoch": 0.8753243543803286, + "grad_norm": 1.1616312337400467, + "learning_rate": 1.286397344831508e-05, + "loss": 0.2986103892326355, + "step": 1771 + }, + { + "epoch": 0.8758186086741628, + "grad_norm": 1.0876002245947722, + "learning_rate": 1.2856144159610197e-05, + "loss": 0.31291434168815613, + "step": 1772 + }, + { + "epoch": 0.876312862967997, + "grad_norm": 1.0995747953439883, + "learning_rate": 1.2848312964184283e-05, + "loss": 0.28285568952560425, + "step": 1773 + }, + { + "epoch": 0.8768071172618312, + "grad_norm": 1.1726581514839194, + "learning_rate": 1.2840479867265331e-05, + "loss": 0.3319891095161438, + "step": 1774 + }, + { + "epoch": 0.8773013715556653, + "grad_norm": 1.1459648615093148, + "learning_rate": 1.2832644874082604e-05, + "loss": 0.3265117406845093, + "step": 1775 + }, + { + "epoch": 0.8777956258494996, + "grad_norm": 1.1247446577558389, + "learning_rate": 1.2824807989866635e-05, + "loss": 0.32061511278152466, + "step": 1776 + }, + { + "epoch": 0.8782898801433338, + "grad_norm": 1.2889890707472673, + "learning_rate": 1.2816969219849214e-05, + "loss": 0.34278666973114014, + "step": 1777 + }, + { + "epoch": 0.878784134437168, + "grad_norm": 1.0560963427574246, + "learning_rate": 1.2809128569263387e-05, + "loss": 0.28335195779800415, + "step": 1778 + }, + { + "epoch": 0.8792783887310021, + "grad_norm": 1.261751311219404, + "learning_rate": 1.2801286043343468e-05, + "loss": 0.35037046670913696, + "step": 1779 + }, + { + "epoch": 0.8797726430248363, + "grad_norm": 1.2110241423671546, + "learning_rate": 1.2793441647325012e-05, + "loss": 0.30058878660202026, + "step": 1780 + }, + { + "epoch": 0.8802668973186705, + "grad_norm": 1.1234244113929972, + "learning_rate": 1.2785595386444824e-05, + "loss": 0.29526466131210327, + "step": 1781 + }, + { + "epoch": 0.8807611516125047, + "grad_norm": 1.128737037655087, + "learning_rate": 1.2777747265940956e-05, + "loss": 0.3194332718849182, + "step": 1782 + }, + { + "epoch": 0.8812554059063388, + "grad_norm": 1.1751557862784823, + "learning_rate": 1.2769897291052709e-05, + "loss": 0.33527326583862305, + "step": 1783 + }, + { + "epoch": 0.881749660200173, + "grad_norm": 1.1396387575620477, + "learning_rate": 1.2762045467020601e-05, + "loss": 0.3277815580368042, + "step": 1784 + }, + { + "epoch": 0.8822439144940072, + "grad_norm": 1.2472488401817894, + "learning_rate": 1.2754191799086406e-05, + "loss": 0.31030380725860596, + "step": 1785 + }, + { + "epoch": 0.8827381687878414, + "grad_norm": 1.2316289072611675, + "learning_rate": 1.274633629249312e-05, + "loss": 0.34496408700942993, + "step": 1786 + }, + { + "epoch": 0.8832324230816755, + "grad_norm": 1.1907817971144352, + "learning_rate": 1.2738478952484964e-05, + "loss": 0.31008201837539673, + "step": 1787 + }, + { + "epoch": 0.8837266773755097, + "grad_norm": 1.1874449425538405, + "learning_rate": 1.2730619784307388e-05, + "loss": 0.35956043004989624, + "step": 1788 + }, + { + "epoch": 0.8842209316693439, + "grad_norm": 1.075617061625236, + "learning_rate": 1.272275879320706e-05, + "loss": 0.2944573760032654, + "step": 1789 + }, + { + "epoch": 0.8847151859631781, + "grad_norm": 1.0739187183942678, + "learning_rate": 1.2714895984431863e-05, + "loss": 0.2941366136074066, + "step": 1790 + }, + { + "epoch": 0.8852094402570122, + "grad_norm": 1.3130838842625934, + "learning_rate": 1.2707031363230901e-05, + "loss": 0.34683144092559814, + "step": 1791 + }, + { + "epoch": 0.8857036945508464, + "grad_norm": 1.1309361657268096, + "learning_rate": 1.2699164934854475e-05, + "loss": 0.3014514744281769, + "step": 1792 + }, + { + "epoch": 0.8861979488446806, + "grad_norm": 1.1642635843186193, + "learning_rate": 1.2691296704554112e-05, + "loss": 0.2749955654144287, + "step": 1793 + }, + { + "epoch": 0.8866922031385147, + "grad_norm": 1.2406193113190336, + "learning_rate": 1.2683426677582518e-05, + "loss": 0.3707960844039917, + "step": 1794 + }, + { + "epoch": 0.8871864574323489, + "grad_norm": 1.098057655891237, + "learning_rate": 1.2675554859193615e-05, + "loss": 0.3122541606426239, + "step": 1795 + }, + { + "epoch": 0.8876807117261831, + "grad_norm": 1.1564617646628, + "learning_rate": 1.2667681254642521e-05, + "loss": 0.3072753846645355, + "step": 1796 + }, + { + "epoch": 0.8881749660200173, + "grad_norm": 1.1000251936377918, + "learning_rate": 1.2659805869185534e-05, + "loss": 0.27002331614494324, + "step": 1797 + }, + { + "epoch": 0.8886692203138514, + "grad_norm": 1.0649961261949041, + "learning_rate": 1.2651928708080155e-05, + "loss": 0.2775167226791382, + "step": 1798 + }, + { + "epoch": 0.8891634746076856, + "grad_norm": 1.0134446617324497, + "learning_rate": 1.2644049776585061e-05, + "loss": 0.30023425817489624, + "step": 1799 + }, + { + "epoch": 0.8896577289015198, + "grad_norm": 1.0536326288458973, + "learning_rate": 1.2636169079960116e-05, + "loss": 0.29491451382637024, + "step": 1800 + }, + { + "epoch": 0.890151983195354, + "grad_norm": 1.1393442237009457, + "learning_rate": 1.2628286623466359e-05, + "loss": 0.3069722652435303, + "step": 1801 + }, + { + "epoch": 0.8906462374891881, + "grad_norm": 1.0432479678380786, + "learning_rate": 1.2620402412366006e-05, + "loss": 0.30594444274902344, + "step": 1802 + }, + { + "epoch": 0.8911404917830223, + "grad_norm": 1.224543789313884, + "learning_rate": 1.2612516451922442e-05, + "loss": 0.278346985578537, + "step": 1803 + }, + { + "epoch": 0.8916347460768566, + "grad_norm": 1.2157715105375322, + "learning_rate": 1.2604628747400227e-05, + "loss": 0.2985970973968506, + "step": 1804 + }, + { + "epoch": 0.8921290003706908, + "grad_norm": 1.1226040901686842, + "learning_rate": 1.259673930406507e-05, + "loss": 0.31054627895355225, + "step": 1805 + }, + { + "epoch": 0.8926232546645249, + "grad_norm": 1.1289150487077437, + "learning_rate": 1.258884812718386e-05, + "loss": 0.28903907537460327, + "step": 1806 + }, + { + "epoch": 0.8931175089583591, + "grad_norm": 1.132448586420589, + "learning_rate": 1.258095522202463e-05, + "loss": 0.2937915027141571, + "step": 1807 + }, + { + "epoch": 0.8936117632521933, + "grad_norm": 1.251676196860064, + "learning_rate": 1.257306059385657e-05, + "loss": 0.3038950562477112, + "step": 1808 + }, + { + "epoch": 0.8941060175460275, + "grad_norm": 1.235431629529867, + "learning_rate": 1.2565164247950023e-05, + "loss": 0.3081057071685791, + "step": 1809 + }, + { + "epoch": 0.8946002718398616, + "grad_norm": 1.1023391071403523, + "learning_rate": 1.2557266189576478e-05, + "loss": 0.2608702480792999, + "step": 1810 + }, + { + "epoch": 0.8950945261336958, + "grad_norm": 1.1733196648921136, + "learning_rate": 1.254936642400856e-05, + "loss": 0.2829548120498657, + "step": 1811 + }, + { + "epoch": 0.89558878042753, + "grad_norm": 1.1986500261226571, + "learning_rate": 1.2541464956520045e-05, + "loss": 0.3157985508441925, + "step": 1812 + }, + { + "epoch": 0.8960830347213642, + "grad_norm": 1.1805347109907394, + "learning_rate": 1.2533561792385837e-05, + "loss": 0.2948974370956421, + "step": 1813 + }, + { + "epoch": 0.8965772890151983, + "grad_norm": 1.1460952132203441, + "learning_rate": 1.252565693688198e-05, + "loss": 0.3011903166770935, + "step": 1814 + }, + { + "epoch": 0.8970715433090325, + "grad_norm": 1.3055245186221631, + "learning_rate": 1.2517750395285635e-05, + "loss": 0.3570353388786316, + "step": 1815 + }, + { + "epoch": 0.8975657976028667, + "grad_norm": 1.1337741379781219, + "learning_rate": 1.2509842172875105e-05, + "loss": 0.30166712403297424, + "step": 1816 + }, + { + "epoch": 0.8980600518967009, + "grad_norm": 1.104423129790351, + "learning_rate": 1.2501932274929797e-05, + "loss": 0.3260636329650879, + "step": 1817 + }, + { + "epoch": 0.898554306190535, + "grad_norm": 1.0975906386988825, + "learning_rate": 1.2494020706730251e-05, + "loss": 0.31647035479545593, + "step": 1818 + }, + { + "epoch": 0.8990485604843692, + "grad_norm": 1.2099925292750648, + "learning_rate": 1.2486107473558118e-05, + "loss": 0.3059273064136505, + "step": 1819 + }, + { + "epoch": 0.8995428147782034, + "grad_norm": 1.1238527206258473, + "learning_rate": 1.247819258069616e-05, + "loss": 0.31050577759742737, + "step": 1820 + }, + { + "epoch": 0.9000370690720375, + "grad_norm": 1.167261413544568, + "learning_rate": 1.2470276033428241e-05, + "loss": 0.3199779689311981, + "step": 1821 + }, + { + "epoch": 0.9005313233658717, + "grad_norm": 1.1634621252313533, + "learning_rate": 1.2462357837039338e-05, + "loss": 0.31346091628074646, + "step": 1822 + }, + { + "epoch": 0.9010255776597059, + "grad_norm": 1.7712393639688087, + "learning_rate": 1.245443799681553e-05, + "loss": 0.31128326058387756, + "step": 1823 + }, + { + "epoch": 0.9015198319535401, + "grad_norm": 1.0665988205220116, + "learning_rate": 1.244651651804398e-05, + "loss": 0.27540329098701477, + "step": 1824 + }, + { + "epoch": 0.9020140862473742, + "grad_norm": 1.08908725997666, + "learning_rate": 1.243859340601296e-05, + "loss": 0.2613363265991211, + "step": 1825 + }, + { + "epoch": 0.9025083405412084, + "grad_norm": 1.1499718586586674, + "learning_rate": 1.2430668666011825e-05, + "loss": 0.30530184507369995, + "step": 1826 + }, + { + "epoch": 0.9030025948350426, + "grad_norm": 1.0907140946424856, + "learning_rate": 1.2422742303331022e-05, + "loss": 0.3223349153995514, + "step": 1827 + }, + { + "epoch": 0.9034968491288768, + "grad_norm": 1.131086049145241, + "learning_rate": 1.2414814323262067e-05, + "loss": 0.32017287611961365, + "step": 1828 + }, + { + "epoch": 0.9039911034227109, + "grad_norm": 1.2183101338845472, + "learning_rate": 1.2406884731097582e-05, + "loss": 0.2965891361236572, + "step": 1829 + }, + { + "epoch": 0.9044853577165451, + "grad_norm": 1.535326476461108, + "learning_rate": 1.2398953532131235e-05, + "loss": 0.3517727851867676, + "step": 1830 + }, + { + "epoch": 0.9049796120103794, + "grad_norm": 1.0055415215772612, + "learning_rate": 1.2391020731657788e-05, + "loss": 0.26107311248779297, + "step": 1831 + }, + { + "epoch": 0.9054738663042136, + "grad_norm": 1.16405975535122, + "learning_rate": 1.2383086334973065e-05, + "loss": 0.31327998638153076, + "step": 1832 + }, + { + "epoch": 0.9059681205980477, + "grad_norm": 1.1376729658041929, + "learning_rate": 1.2375150347373956e-05, + "loss": 0.2708127498626709, + "step": 1833 + }, + { + "epoch": 0.9064623748918819, + "grad_norm": 1.2578266997569258, + "learning_rate": 1.236721277415841e-05, + "loss": 0.3264025151729584, + "step": 1834 + }, + { + "epoch": 0.9069566291857161, + "grad_norm": 1.1552886471917594, + "learning_rate": 1.2359273620625438e-05, + "loss": 0.3226723074913025, + "step": 1835 + }, + { + "epoch": 0.9074508834795503, + "grad_norm": 1.095230882373492, + "learning_rate": 1.2351332892075109e-05, + "loss": 0.2895771861076355, + "step": 1836 + }, + { + "epoch": 0.9079451377733844, + "grad_norm": 1.149733162695983, + "learning_rate": 1.234339059380854e-05, + "loss": 0.3316076397895813, + "step": 1837 + }, + { + "epoch": 0.9084393920672186, + "grad_norm": 1.10037368979265, + "learning_rate": 1.2335446731127887e-05, + "loss": 0.29858651757240295, + "step": 1838 + }, + { + "epoch": 0.9089336463610528, + "grad_norm": 1.2759313559643695, + "learning_rate": 1.2327501309336371e-05, + "loss": 0.31340792775154114, + "step": 1839 + }, + { + "epoch": 0.909427900654887, + "grad_norm": 1.038203202123546, + "learning_rate": 1.2319554333738236e-05, + "loss": 0.27344945073127747, + "step": 1840 + }, + { + "epoch": 0.9099221549487211, + "grad_norm": 1.1811761633875792, + "learning_rate": 1.2311605809638766e-05, + "loss": 0.27349725365638733, + "step": 1841 + }, + { + "epoch": 0.9104164092425553, + "grad_norm": 1.2931266398373575, + "learning_rate": 1.2303655742344292e-05, + "loss": 0.28933316469192505, + "step": 1842 + }, + { + "epoch": 0.9109106635363895, + "grad_norm": 1.1360201134878805, + "learning_rate": 1.2295704137162158e-05, + "loss": 0.3315466344356537, + "step": 1843 + }, + { + "epoch": 0.9114049178302237, + "grad_norm": 1.3735184410271417, + "learning_rate": 1.2287750999400743e-05, + "loss": 0.3227408528327942, + "step": 1844 + }, + { + "epoch": 0.9118991721240578, + "grad_norm": 1.1237568254849295, + "learning_rate": 1.2279796334369447e-05, + "loss": 0.30476877093315125, + "step": 1845 + }, + { + "epoch": 0.912393426417892, + "grad_norm": 1.1863082805694927, + "learning_rate": 1.2271840147378697e-05, + "loss": 0.29941046237945557, + "step": 1846 + }, + { + "epoch": 0.9128876807117262, + "grad_norm": 1.040665730868043, + "learning_rate": 1.2263882443739923e-05, + "loss": 0.26635122299194336, + "step": 1847 + }, + { + "epoch": 0.9133819350055603, + "grad_norm": 1.2009768589181191, + "learning_rate": 1.2255923228765574e-05, + "loss": 0.32384809851646423, + "step": 1848 + }, + { + "epoch": 0.9138761892993945, + "grad_norm": 1.1005403546735195, + "learning_rate": 1.2247962507769113e-05, + "loss": 0.2830178141593933, + "step": 1849 + }, + { + "epoch": 0.9143704435932287, + "grad_norm": 1.146384025635135, + "learning_rate": 1.2240000286065003e-05, + "loss": 0.32860931754112244, + "step": 1850 + }, + { + "epoch": 0.9148646978870629, + "grad_norm": 1.1448106720128721, + "learning_rate": 1.2232036568968703e-05, + "loss": 0.2820647954940796, + "step": 1851 + }, + { + "epoch": 0.915358952180897, + "grad_norm": 1.2180250787611469, + "learning_rate": 1.2224071361796685e-05, + "loss": 0.3368694484233856, + "step": 1852 + }, + { + "epoch": 0.9158532064747312, + "grad_norm": 1.1414152376911786, + "learning_rate": 1.2216104669866405e-05, + "loss": 0.32594096660614014, + "step": 1853 + }, + { + "epoch": 0.9163474607685654, + "grad_norm": 1.129839927585001, + "learning_rate": 1.2208136498496307e-05, + "loss": 0.3383556008338928, + "step": 1854 + }, + { + "epoch": 0.9168417150623996, + "grad_norm": 1.1516691565096748, + "learning_rate": 1.2200166853005837e-05, + "loss": 0.2655363976955414, + "step": 1855 + }, + { + "epoch": 0.9173359693562337, + "grad_norm": 1.067780593954706, + "learning_rate": 1.2192195738715414e-05, + "loss": 0.30512773990631104, + "step": 1856 + }, + { + "epoch": 0.917830223650068, + "grad_norm": 1.3304322049937938, + "learning_rate": 1.2184223160946433e-05, + "loss": 0.34026995301246643, + "step": 1857 + }, + { + "epoch": 0.9183244779439022, + "grad_norm": 1.2079696433735554, + "learning_rate": 1.2176249125021281e-05, + "loss": 0.29324328899383545, + "step": 1858 + }, + { + "epoch": 0.9188187322377364, + "grad_norm": 1.454623275441196, + "learning_rate": 1.2168273636263308e-05, + "loss": 0.3114206790924072, + "step": 1859 + }, + { + "epoch": 0.9193129865315705, + "grad_norm": 1.1301917440411622, + "learning_rate": 1.2160296699996839e-05, + "loss": 0.2829141914844513, + "step": 1860 + }, + { + "epoch": 0.9198072408254047, + "grad_norm": 1.0721269081592821, + "learning_rate": 1.2152318321547156e-05, + "loss": 0.2735600769519806, + "step": 1861 + }, + { + "epoch": 0.9203014951192389, + "grad_norm": 1.0465335380212768, + "learning_rate": 1.2144338506240519e-05, + "loss": 0.3160930573940277, + "step": 1862 + }, + { + "epoch": 0.9207957494130731, + "grad_norm": 1.0735769631967078, + "learning_rate": 1.2136357259404128e-05, + "loss": 0.26677393913269043, + "step": 1863 + }, + { + "epoch": 0.9212900037069072, + "grad_norm": 1.1305004585474958, + "learning_rate": 1.2128374586366159e-05, + "loss": 0.33033064007759094, + "step": 1864 + }, + { + "epoch": 0.9217842580007414, + "grad_norm": 1.1210908469065626, + "learning_rate": 1.2120390492455727e-05, + "loss": 0.28271663188934326, + "step": 1865 + }, + { + "epoch": 0.9222785122945756, + "grad_norm": 1.1196923913120616, + "learning_rate": 1.21124049830029e-05, + "loss": 0.3116013705730438, + "step": 1866 + }, + { + "epoch": 0.9227727665884098, + "grad_norm": 1.1258678919425735, + "learning_rate": 1.2104418063338686e-05, + "loss": 0.30614158511161804, + "step": 1867 + }, + { + "epoch": 0.9232670208822439, + "grad_norm": 1.2128311616527454, + "learning_rate": 1.2096429738795041e-05, + "loss": 0.34351983666419983, + "step": 1868 + }, + { + "epoch": 0.9237612751760781, + "grad_norm": 1.2814647055659063, + "learning_rate": 1.2088440014704858e-05, + "loss": 0.31006965041160583, + "step": 1869 + }, + { + "epoch": 0.9242555294699123, + "grad_norm": 1.093225958461299, + "learning_rate": 1.2080448896401964e-05, + "loss": 0.2671147584915161, + "step": 1870 + }, + { + "epoch": 0.9247497837637465, + "grad_norm": 1.2298582810409653, + "learning_rate": 1.207245638922111e-05, + "loss": 0.29123416543006897, + "step": 1871 + }, + { + "epoch": 0.9252440380575806, + "grad_norm": 1.1613532669189326, + "learning_rate": 1.2064462498497984e-05, + "loss": 0.31838539242744446, + "step": 1872 + }, + { + "epoch": 0.9257382923514148, + "grad_norm": 1.1861407153761483, + "learning_rate": 1.205646722956919e-05, + "loss": 0.3158906102180481, + "step": 1873 + }, + { + "epoch": 0.926232546645249, + "grad_norm": 1.2339017273841688, + "learning_rate": 1.2048470587772257e-05, + "loss": 0.3679552674293518, + "step": 1874 + }, + { + "epoch": 0.9267268009390831, + "grad_norm": 1.1210108605660978, + "learning_rate": 1.204047257844563e-05, + "loss": 0.2891008257865906, + "step": 1875 + }, + { + "epoch": 0.9272210552329173, + "grad_norm": 1.1110723692294957, + "learning_rate": 1.2032473206928663e-05, + "loss": 0.3207235634326935, + "step": 1876 + }, + { + "epoch": 0.9277153095267515, + "grad_norm": 1.203189154519193, + "learning_rate": 1.2024472478561624e-05, + "loss": 0.2710658311843872, + "step": 1877 + }, + { + "epoch": 0.9282095638205857, + "grad_norm": 1.1156076578026985, + "learning_rate": 1.2016470398685685e-05, + "loss": 0.2554836869239807, + "step": 1878 + }, + { + "epoch": 0.9287038181144198, + "grad_norm": 1.079454168196498, + "learning_rate": 1.2008466972642921e-05, + "loss": 0.2822943329811096, + "step": 1879 + }, + { + "epoch": 0.929198072408254, + "grad_norm": 1.2007950112208574, + "learning_rate": 1.20004622057763e-05, + "loss": 0.3447754681110382, + "step": 1880 + }, + { + "epoch": 0.9296923267020882, + "grad_norm": 1.1885607345269107, + "learning_rate": 1.1992456103429694e-05, + "loss": 0.3009227514266968, + "step": 1881 + }, + { + "epoch": 0.9301865809959224, + "grad_norm": 1.3491102685763696, + "learning_rate": 1.1984448670947863e-05, + "loss": 0.33154594898223877, + "step": 1882 + }, + { + "epoch": 0.9306808352897565, + "grad_norm": 1.7075348805187878, + "learning_rate": 1.1976439913676457e-05, + "loss": 0.32905343174934387, + "step": 1883 + }, + { + "epoch": 0.9311750895835907, + "grad_norm": 1.2010662669423082, + "learning_rate": 1.1968429836962e-05, + "loss": 0.34757447242736816, + "step": 1884 + }, + { + "epoch": 0.931669343877425, + "grad_norm": 1.2626693752273819, + "learning_rate": 1.1960418446151912e-05, + "loss": 0.29980987310409546, + "step": 1885 + }, + { + "epoch": 0.9321635981712592, + "grad_norm": 1.081439601568963, + "learning_rate": 1.1952405746594477e-05, + "loss": 0.3106808662414551, + "step": 1886 + }, + { + "epoch": 0.9326578524650933, + "grad_norm": 1.2465315131717423, + "learning_rate": 1.1944391743638863e-05, + "loss": 0.3222411572933197, + "step": 1887 + }, + { + "epoch": 0.9331521067589275, + "grad_norm": 1.117897007008322, + "learning_rate": 1.1936376442635104e-05, + "loss": 0.3365646302700043, + "step": 1888 + }, + { + "epoch": 0.9336463610527617, + "grad_norm": 1.2223325106102665, + "learning_rate": 1.1928359848934101e-05, + "loss": 0.32500627636909485, + "step": 1889 + }, + { + "epoch": 0.9341406153465959, + "grad_norm": 1.1692844365001853, + "learning_rate": 1.1920341967887614e-05, + "loss": 0.31395500898361206, + "step": 1890 + }, + { + "epoch": 0.93463486964043, + "grad_norm": 1.084320264091655, + "learning_rate": 1.1912322804848268e-05, + "loss": 0.3060624301433563, + "step": 1891 + }, + { + "epoch": 0.9351291239342642, + "grad_norm": 1.042165685734395, + "learning_rate": 1.190430236516954e-05, + "loss": 0.2644454836845398, + "step": 1892 + }, + { + "epoch": 0.9356233782280984, + "grad_norm": 1.2086818065931575, + "learning_rate": 1.1896280654205765e-05, + "loss": 0.33404678106307983, + "step": 1893 + }, + { + "epoch": 0.9361176325219326, + "grad_norm": 1.0362894963118763, + "learning_rate": 1.1888257677312119e-05, + "loss": 0.28557512164115906, + "step": 1894 + }, + { + "epoch": 0.9366118868157667, + "grad_norm": 1.1281245501630466, + "learning_rate": 1.1880233439844623e-05, + "loss": 0.3332308530807495, + "step": 1895 + }, + { + "epoch": 0.9371061411096009, + "grad_norm": 1.0648316720915905, + "learning_rate": 1.1872207947160155e-05, + "loss": 0.3274528384208679, + "step": 1896 + }, + { + "epoch": 0.9376003954034351, + "grad_norm": 1.168900116977035, + "learning_rate": 1.1864181204616404e-05, + "loss": 0.297880083322525, + "step": 1897 + }, + { + "epoch": 0.9380946496972693, + "grad_norm": 1.1118774536365064, + "learning_rate": 1.1856153217571924e-05, + "loss": 0.3404296040534973, + "step": 1898 + }, + { + "epoch": 0.9385889039911034, + "grad_norm": 1.7308625403608067, + "learning_rate": 1.1848123991386073e-05, + "loss": 0.32343849539756775, + "step": 1899 + }, + { + "epoch": 0.9390831582849376, + "grad_norm": 1.0296882466024648, + "learning_rate": 1.1840093531419052e-05, + "loss": 0.26679158210754395, + "step": 1900 + }, + { + "epoch": 0.9395774125787718, + "grad_norm": 1.0019153721179144, + "learning_rate": 1.1832061843031884e-05, + "loss": 0.28106996417045593, + "step": 1901 + }, + { + "epoch": 0.9400716668726059, + "grad_norm": 1.1236721425678955, + "learning_rate": 1.1824028931586406e-05, + "loss": 0.28356847167015076, + "step": 1902 + }, + { + "epoch": 0.9405659211664401, + "grad_norm": 1.2443758247501144, + "learning_rate": 1.1815994802445274e-05, + "loss": 0.3256348669528961, + "step": 1903 + }, + { + "epoch": 0.9410601754602743, + "grad_norm": 1.1514727386744015, + "learning_rate": 1.1807959460971958e-05, + "loss": 0.2781906723976135, + "step": 1904 + }, + { + "epoch": 0.9415544297541085, + "grad_norm": 1.2599759308188183, + "learning_rate": 1.1799922912530741e-05, + "loss": 0.3129916787147522, + "step": 1905 + }, + { + "epoch": 0.9420486840479426, + "grad_norm": 1.1353254220103308, + "learning_rate": 1.1791885162486705e-05, + "loss": 0.281986266374588, + "step": 1906 + }, + { + "epoch": 0.9425429383417768, + "grad_norm": 1.2313275172087987, + "learning_rate": 1.1783846216205734e-05, + "loss": 0.33587342500686646, + "step": 1907 + }, + { + "epoch": 0.943037192635611, + "grad_norm": 1.0408682927660702, + "learning_rate": 1.1775806079054522e-05, + "loss": 0.27715635299682617, + "step": 1908 + }, + { + "epoch": 0.9435314469294452, + "grad_norm": 1.1581221243071849, + "learning_rate": 1.1767764756400541e-05, + "loss": 0.3190307915210724, + "step": 1909 + }, + { + "epoch": 0.9440257012232793, + "grad_norm": 1.1962319364965919, + "learning_rate": 1.175972225361207e-05, + "loss": 0.29336807131767273, + "step": 1910 + }, + { + "epoch": 0.9445199555171135, + "grad_norm": 1.1448708364637925, + "learning_rate": 1.1751678576058164e-05, + "loss": 0.3001596927642822, + "step": 1911 + }, + { + "epoch": 0.9450142098109477, + "grad_norm": 1.0832545536390727, + "learning_rate": 1.1743633729108672e-05, + "loss": 0.26952457427978516, + "step": 1912 + }, + { + "epoch": 0.945508464104782, + "grad_norm": 1.166519142960908, + "learning_rate": 1.1735587718134212e-05, + "loss": 0.3193609118461609, + "step": 1913 + }, + { + "epoch": 0.946002718398616, + "grad_norm": 1.2095746348772163, + "learning_rate": 1.172754054850619e-05, + "loss": 0.2810664176940918, + "step": 1914 + }, + { + "epoch": 0.9464969726924503, + "grad_norm": 1.1743627712454017, + "learning_rate": 1.1719492225596783e-05, + "loss": 0.28850311040878296, + "step": 1915 + }, + { + "epoch": 0.9469912269862845, + "grad_norm": 1.1739524489187587, + "learning_rate": 1.1711442754778936e-05, + "loss": 0.32268932461738586, + "step": 1916 + }, + { + "epoch": 0.9474854812801187, + "grad_norm": 1.2236575262685914, + "learning_rate": 1.1703392141426356e-05, + "loss": 0.3149149715900421, + "step": 1917 + }, + { + "epoch": 0.9479797355739528, + "grad_norm": 1.0472038436966378, + "learning_rate": 1.1695340390913526e-05, + "loss": 0.2537482678890228, + "step": 1918 + }, + { + "epoch": 0.948473989867787, + "grad_norm": 1.1232208833213926, + "learning_rate": 1.168728750861567e-05, + "loss": 0.2611936330795288, + "step": 1919 + }, + { + "epoch": 0.9489682441616212, + "grad_norm": 1.0077623948815433, + "learning_rate": 1.1679233499908781e-05, + "loss": 0.263653427362442, + "step": 1920 + }, + { + "epoch": 0.9494624984554554, + "grad_norm": 1.1707561168968341, + "learning_rate": 1.1671178370169604e-05, + "loss": 0.3122594952583313, + "step": 1921 + }, + { + "epoch": 0.9499567527492895, + "grad_norm": 1.1924449722361925, + "learning_rate": 1.1663122124775626e-05, + "loss": 0.3101043701171875, + "step": 1922 + }, + { + "epoch": 0.9504510070431237, + "grad_norm": 1.129901320884474, + "learning_rate": 1.1655064769105077e-05, + "loss": 0.295572966337204, + "step": 1923 + }, + { + "epoch": 0.9509452613369579, + "grad_norm": 1.1537509505815167, + "learning_rate": 1.1647006308536937e-05, + "loss": 0.29732125997543335, + "step": 1924 + }, + { + "epoch": 0.951439515630792, + "grad_norm": 1.1914038253365087, + "learning_rate": 1.1638946748450922e-05, + "loss": 0.32320737838745117, + "step": 1925 + }, + { + "epoch": 0.9519337699246262, + "grad_norm": 1.2581984463314084, + "learning_rate": 1.1630886094227471e-05, + "loss": 0.3306753933429718, + "step": 1926 + }, + { + "epoch": 0.9524280242184604, + "grad_norm": 1.0367245477692144, + "learning_rate": 1.1622824351247767e-05, + "loss": 0.2368355095386505, + "step": 1927 + }, + { + "epoch": 0.9529222785122946, + "grad_norm": 1.2216253394681036, + "learning_rate": 1.1614761524893715e-05, + "loss": 0.28470784425735474, + "step": 1928 + }, + { + "epoch": 0.9534165328061287, + "grad_norm": 1.1721810384499396, + "learning_rate": 1.160669762054794e-05, + "loss": 0.34468895196914673, + "step": 1929 + }, + { + "epoch": 0.9539107870999629, + "grad_norm": 1.1277795177992218, + "learning_rate": 1.1598632643593787e-05, + "loss": 0.30562442541122437, + "step": 1930 + }, + { + "epoch": 0.9544050413937971, + "grad_norm": 1.2141650113141733, + "learning_rate": 1.159056659941533e-05, + "loss": 0.2861478924751282, + "step": 1931 + }, + { + "epoch": 0.9548992956876313, + "grad_norm": 1.0692532214940453, + "learning_rate": 1.1582499493397332e-05, + "loss": 0.32385969161987305, + "step": 1932 + }, + { + "epoch": 0.9553935499814654, + "grad_norm": 1.173323189937386, + "learning_rate": 1.1574431330925287e-05, + "loss": 0.2935449481010437, + "step": 1933 + }, + { + "epoch": 0.9558878042752996, + "grad_norm": 1.1041433205065538, + "learning_rate": 1.156636211738538e-05, + "loss": 0.29380083084106445, + "step": 1934 + }, + { + "epoch": 0.9563820585691338, + "grad_norm": 1.1455066452691371, + "learning_rate": 1.1558291858164503e-05, + "loss": 0.2957204282283783, + "step": 1935 + }, + { + "epoch": 0.956876312862968, + "grad_norm": 1.084977751415868, + "learning_rate": 1.1550220558650246e-05, + "loss": 0.26402851939201355, + "step": 1936 + }, + { + "epoch": 0.9573705671568021, + "grad_norm": 1.1085858464768976, + "learning_rate": 1.1542148224230897e-05, + "loss": 0.29163527488708496, + "step": 1937 + }, + { + "epoch": 0.9578648214506363, + "grad_norm": 1.2120558942254267, + "learning_rate": 1.1534074860295426e-05, + "loss": 0.302470326423645, + "step": 1938 + }, + { + "epoch": 0.9583590757444705, + "grad_norm": 1.1861857419569999, + "learning_rate": 1.15260004722335e-05, + "loss": 0.25946593284606934, + "step": 1939 + }, + { + "epoch": 0.9588533300383048, + "grad_norm": 1.1153985574382288, + "learning_rate": 1.1517925065435457e-05, + "loss": 0.2680559456348419, + "step": 1940 + }, + { + "epoch": 0.9593475843321388, + "grad_norm": 1.2104349484077064, + "learning_rate": 1.1509848645292334e-05, + "loss": 0.2684473991394043, + "step": 1941 + }, + { + "epoch": 0.959841838625973, + "grad_norm": 1.245187124369965, + "learning_rate": 1.1501771217195827e-05, + "loss": 0.2795519232749939, + "step": 1942 + }, + { + "epoch": 0.9603360929198073, + "grad_norm": 1.2532047895072767, + "learning_rate": 1.1493692786538313e-05, + "loss": 0.35209575295448303, + "step": 1943 + }, + { + "epoch": 0.9608303472136415, + "grad_norm": 1.176019791514668, + "learning_rate": 1.1485613358712839e-05, + "loss": 0.3058928847312927, + "step": 1944 + }, + { + "epoch": 0.9613246015074756, + "grad_norm": 1.103375830615649, + "learning_rate": 1.1477532939113112e-05, + "loss": 0.2889159619808197, + "step": 1945 + }, + { + "epoch": 0.9618188558013098, + "grad_norm": 1.175759039350938, + "learning_rate": 1.1469451533133506e-05, + "loss": 0.30782538652420044, + "step": 1946 + }, + { + "epoch": 0.962313110095144, + "grad_norm": 1.1326992133409532, + "learning_rate": 1.1461369146169052e-05, + "loss": 0.3091726005077362, + "step": 1947 + }, + { + "epoch": 0.9628073643889782, + "grad_norm": 1.2061917553730328, + "learning_rate": 1.1453285783615438e-05, + "loss": 0.3287050724029541, + "step": 1948 + }, + { + "epoch": 0.9633016186828123, + "grad_norm": 1.1941959404182023, + "learning_rate": 1.1445201450868998e-05, + "loss": 0.31267625093460083, + "step": 1949 + }, + { + "epoch": 0.9637958729766465, + "grad_norm": 1.1346278168962094, + "learning_rate": 1.1437116153326719e-05, + "loss": 0.30775952339172363, + "step": 1950 + }, + { + "epoch": 0.9642901272704807, + "grad_norm": 1.292541938462464, + "learning_rate": 1.142902989638623e-05, + "loss": 0.3825497329235077, + "step": 1951 + }, + { + "epoch": 0.9647843815643148, + "grad_norm": 1.0454710330230295, + "learning_rate": 1.1420942685445801e-05, + "loss": 0.2866062521934509, + "step": 1952 + }, + { + "epoch": 0.965278635858149, + "grad_norm": 1.144633580750803, + "learning_rate": 1.1412854525904335e-05, + "loss": 0.27787062525749207, + "step": 1953 + }, + { + "epoch": 0.9657728901519832, + "grad_norm": 1.1290436448297894, + "learning_rate": 1.1404765423161381e-05, + "loss": 0.302572101354599, + "step": 1954 + }, + { + "epoch": 0.9662671444458174, + "grad_norm": 1.0781086639824042, + "learning_rate": 1.1396675382617097e-05, + "loss": 0.29608359932899475, + "step": 1955 + }, + { + "epoch": 0.9667613987396515, + "grad_norm": 1.1646658995895742, + "learning_rate": 1.1388584409672285e-05, + "loss": 0.28057801723480225, + "step": 1956 + }, + { + "epoch": 0.9672556530334857, + "grad_norm": 1.1188617227766138, + "learning_rate": 1.1380492509728363e-05, + "loss": 0.29628869891166687, + "step": 1957 + }, + { + "epoch": 0.9677499073273199, + "grad_norm": 1.1207660926511307, + "learning_rate": 1.1372399688187365e-05, + "loss": 0.29254984855651855, + "step": 1958 + }, + { + "epoch": 0.9682441616211541, + "grad_norm": 1.10665523309967, + "learning_rate": 1.1364305950451946e-05, + "loss": 0.32925280928611755, + "step": 1959 + }, + { + "epoch": 0.9687384159149882, + "grad_norm": 1.108029328920716, + "learning_rate": 1.1356211301925367e-05, + "loss": 0.3072258234024048, + "step": 1960 + }, + { + "epoch": 0.9692326702088224, + "grad_norm": 1.1133536367191044, + "learning_rate": 1.1348115748011499e-05, + "loss": 0.29737845063209534, + "step": 1961 + }, + { + "epoch": 0.9697269245026566, + "grad_norm": 1.1169451234105505, + "learning_rate": 1.1340019294114822e-05, + "loss": 0.27369949221611023, + "step": 1962 + }, + { + "epoch": 0.9702211787964908, + "grad_norm": 1.2861478922811351, + "learning_rate": 1.1331921945640408e-05, + "loss": 0.33116602897644043, + "step": 1963 + }, + { + "epoch": 0.9707154330903249, + "grad_norm": 1.9398235156973715, + "learning_rate": 1.1323823707993937e-05, + "loss": 0.2620438039302826, + "step": 1964 + }, + { + "epoch": 0.9712096873841591, + "grad_norm": 1.1505189829247824, + "learning_rate": 1.1315724586581673e-05, + "loss": 0.3187680244445801, + "step": 1965 + }, + { + "epoch": 0.9717039416779933, + "grad_norm": 1.2391813787863328, + "learning_rate": 1.1307624586810472e-05, + "loss": 0.3675233721733093, + "step": 1966 + }, + { + "epoch": 0.9721981959718276, + "grad_norm": 1.2521490817049854, + "learning_rate": 1.1299523714087784e-05, + "loss": 0.31064945459365845, + "step": 1967 + }, + { + "epoch": 0.9726924502656616, + "grad_norm": 1.1166975993354054, + "learning_rate": 1.1291421973821632e-05, + "loss": 0.2941773235797882, + "step": 1968 + }, + { + "epoch": 0.9731867045594959, + "grad_norm": 1.2565504643296834, + "learning_rate": 1.128331937142062e-05, + "loss": 0.3443846106529236, + "step": 1969 + }, + { + "epoch": 0.9736809588533301, + "grad_norm": 1.1142268279429304, + "learning_rate": 1.1275215912293933e-05, + "loss": 0.2815151810646057, + "step": 1970 + }, + { + "epoch": 0.9741752131471643, + "grad_norm": 1.1622346059327586, + "learning_rate": 1.1267111601851327e-05, + "loss": 0.2886476516723633, + "step": 1971 + }, + { + "epoch": 0.9746694674409984, + "grad_norm": 1.0942194208380682, + "learning_rate": 1.1259006445503116e-05, + "loss": 0.2692835330963135, + "step": 1972 + }, + { + "epoch": 0.9751637217348326, + "grad_norm": 1.1112683317978183, + "learning_rate": 1.1250900448660192e-05, + "loss": 0.2748587727546692, + "step": 1973 + }, + { + "epoch": 0.9756579760286668, + "grad_norm": 1.192989589829818, + "learning_rate": 1.1242793616734002e-05, + "loss": 0.2963098883628845, + "step": 1974 + }, + { + "epoch": 0.976152230322501, + "grad_norm": 1.1305326657315258, + "learning_rate": 1.1234685955136552e-05, + "loss": 0.28353193402290344, + "step": 1975 + }, + { + "epoch": 0.9766464846163351, + "grad_norm": 1.1967273051238179, + "learning_rate": 1.1226577469280397e-05, + "loss": 0.3308493494987488, + "step": 1976 + }, + { + "epoch": 0.9771407389101693, + "grad_norm": 1.096933031801606, + "learning_rate": 1.1218468164578653e-05, + "loss": 0.26923754811286926, + "step": 1977 + }, + { + "epoch": 0.9776349932040035, + "grad_norm": 1.5091635403311783, + "learning_rate": 1.1210358046444968e-05, + "loss": 0.2730574905872345, + "step": 1978 + }, + { + "epoch": 0.9781292474978376, + "grad_norm": 1.1338996219219686, + "learning_rate": 1.1202247120293548e-05, + "loss": 0.26464858651161194, + "step": 1979 + }, + { + "epoch": 0.9786235017916718, + "grad_norm": 1.2694994457222093, + "learning_rate": 1.1194135391539127e-05, + "loss": 0.30095499753952026, + "step": 1980 + }, + { + "epoch": 0.979117756085506, + "grad_norm": 1.3227283597348862, + "learning_rate": 1.1186022865596983e-05, + "loss": 0.3418167233467102, + "step": 1981 + }, + { + "epoch": 0.9796120103793402, + "grad_norm": 1.2780598996117225, + "learning_rate": 1.117790954788292e-05, + "loss": 0.28735262155532837, + "step": 1982 + }, + { + "epoch": 0.9801062646731743, + "grad_norm": 1.109707631385258, + "learning_rate": 1.116979544381327e-05, + "loss": 0.26816800236701965, + "step": 1983 + }, + { + "epoch": 0.9806005189670085, + "grad_norm": 1.1873089360962268, + "learning_rate": 1.1161680558804897e-05, + "loss": 0.31004661321640015, + "step": 1984 + }, + { + "epoch": 0.9810947732608427, + "grad_norm": 1.2669673078204273, + "learning_rate": 1.1153564898275184e-05, + "loss": 0.33103084564208984, + "step": 1985 + }, + { + "epoch": 0.9815890275546769, + "grad_norm": 1.3375894512262838, + "learning_rate": 1.1145448467642021e-05, + "loss": 0.3804841637611389, + "step": 1986 + }, + { + "epoch": 0.982083281848511, + "grad_norm": 1.2029739003434823, + "learning_rate": 1.1137331272323834e-05, + "loss": 0.31861352920532227, + "step": 1987 + }, + { + "epoch": 0.9825775361423452, + "grad_norm": 1.1954996526655464, + "learning_rate": 1.1129213317739539e-05, + "loss": 0.3022298216819763, + "step": 1988 + }, + { + "epoch": 0.9830717904361794, + "grad_norm": 1.3466664334904774, + "learning_rate": 1.1121094609308564e-05, + "loss": 0.38203683495521545, + "step": 1989 + }, + { + "epoch": 0.9835660447300136, + "grad_norm": 1.215882197519198, + "learning_rate": 1.1112975152450848e-05, + "loss": 0.3105717897415161, + "step": 1990 + }, + { + "epoch": 0.9840602990238477, + "grad_norm": 1.2066484647947713, + "learning_rate": 1.1104854952586827e-05, + "loss": 0.31930285692214966, + "step": 1991 + }, + { + "epoch": 0.9845545533176819, + "grad_norm": 1.1639723195264664, + "learning_rate": 1.1096734015137422e-05, + "loss": 0.3167966902256012, + "step": 1992 + }, + { + "epoch": 0.9850488076115161, + "grad_norm": 1.168704133231974, + "learning_rate": 1.1088612345524059e-05, + "loss": 0.2693050801753998, + "step": 1993 + }, + { + "epoch": 0.9855430619053503, + "grad_norm": 1.0985586655404702, + "learning_rate": 1.1080489949168651e-05, + "loss": 0.27986466884613037, + "step": 1994 + }, + { + "epoch": 0.9860373161991844, + "grad_norm": 1.1481757517161775, + "learning_rate": 1.1072366831493589e-05, + "loss": 0.26814526319503784, + "step": 1995 + }, + { + "epoch": 0.9865315704930187, + "grad_norm": 1.146921609246337, + "learning_rate": 1.1064242997921753e-05, + "loss": 0.31393951177597046, + "step": 1996 + }, + { + "epoch": 0.9870258247868529, + "grad_norm": 1.1375630444026625, + "learning_rate": 1.1056118453876496e-05, + "loss": 0.2958461344242096, + "step": 1997 + }, + { + "epoch": 0.9875200790806871, + "grad_norm": 1.137037421352785, + "learning_rate": 1.1047993204781652e-05, + "loss": 0.29744619131088257, + "step": 1998 + }, + { + "epoch": 0.9880143333745212, + "grad_norm": 1.1508003551512254, + "learning_rate": 1.1039867256061516e-05, + "loss": 0.29055094718933105, + "step": 1999 + }, + { + "epoch": 0.9885085876683554, + "grad_norm": 1.1632161121950038, + "learning_rate": 1.103174061314086e-05, + "loss": 0.29961663484573364, + "step": 2000 + }, + { + "epoch": 0.9890028419621896, + "grad_norm": 1.0841825843818378, + "learning_rate": 1.102361328144491e-05, + "loss": 0.34533610939979553, + "step": 2001 + }, + { + "epoch": 0.9894970962560238, + "grad_norm": 1.1849596678411713, + "learning_rate": 1.1015485266399362e-05, + "loss": 0.2994460463523865, + "step": 2002 + }, + { + "epoch": 0.9899913505498579, + "grad_norm": 1.2325420364808024, + "learning_rate": 1.1007356573430357e-05, + "loss": 0.34309566020965576, + "step": 2003 + }, + { + "epoch": 0.9904856048436921, + "grad_norm": 1.2050309252665437, + "learning_rate": 1.09992272079645e-05, + "loss": 0.3049868643283844, + "step": 2004 + }, + { + "epoch": 0.9909798591375263, + "grad_norm": 1.1759703775328856, + "learning_rate": 1.0991097175428833e-05, + "loss": 0.30586326122283936, + "step": 2005 + }, + { + "epoch": 0.9914741134313604, + "grad_norm": 1.1997965130034223, + "learning_rate": 1.0982966481250854e-05, + "loss": 0.29740482568740845, + "step": 2006 + }, + { + "epoch": 0.9919683677251946, + "grad_norm": 1.2400023524315222, + "learning_rate": 1.0974835130858497e-05, + "loss": 0.3218206465244293, + "step": 2007 + }, + { + "epoch": 0.9924626220190288, + "grad_norm": 1.1309419286206777, + "learning_rate": 1.0966703129680139e-05, + "loss": 0.2747582495212555, + "step": 2008 + }, + { + "epoch": 0.992956876312863, + "grad_norm": 1.2581670135770728, + "learning_rate": 1.0958570483144578e-05, + "loss": 0.33215245604515076, + "step": 2009 + }, + { + "epoch": 0.9934511306066971, + "grad_norm": 1.2834058413633842, + "learning_rate": 1.0950437196681061e-05, + "loss": 0.3149756193161011, + "step": 2010 + }, + { + "epoch": 0.9939453849005313, + "grad_norm": 1.1001136330607295, + "learning_rate": 1.0942303275719253e-05, + "loss": 0.2763513922691345, + "step": 2011 + }, + { + "epoch": 0.9944396391943655, + "grad_norm": 1.0592905887432897, + "learning_rate": 1.0934168725689239e-05, + "loss": 0.2818325161933899, + "step": 2012 + }, + { + "epoch": 0.9949338934881997, + "grad_norm": 1.1079515754649163, + "learning_rate": 1.0926033552021533e-05, + "loss": 0.2659858167171478, + "step": 2013 + }, + { + "epoch": 0.9954281477820338, + "grad_norm": 1.1926210163358253, + "learning_rate": 1.091789776014706e-05, + "loss": 0.30891451239585876, + "step": 2014 + }, + { + "epoch": 0.995922402075868, + "grad_norm": 1.2194298136031743, + "learning_rate": 1.0909761355497156e-05, + "loss": 0.33645111322402954, + "step": 2015 + }, + { + "epoch": 0.9964166563697022, + "grad_norm": 1.1110546475920504, + "learning_rate": 1.0901624343503571e-05, + "loss": 0.3086194097995758, + "step": 2016 + }, + { + "epoch": 0.9969109106635364, + "grad_norm": 1.0167201052564092, + "learning_rate": 1.089348672959846e-05, + "loss": 0.2614179253578186, + "step": 2017 + }, + { + "epoch": 0.9974051649573705, + "grad_norm": 1.2224853324284848, + "learning_rate": 1.088534851921437e-05, + "loss": 0.3300556540489197, + "step": 2018 + }, + { + "epoch": 0.9978994192512047, + "grad_norm": 1.1929848499106601, + "learning_rate": 1.087720971778426e-05, + "loss": 0.28443643450737, + "step": 2019 + }, + { + "epoch": 0.9983936735450389, + "grad_norm": 1.052677422924197, + "learning_rate": 1.0869070330741475e-05, + "loss": 0.2805534601211548, + "step": 2020 + }, + { + "epoch": 0.9988879278388731, + "grad_norm": 1.065568553175956, + "learning_rate": 1.0860930363519758e-05, + "loss": 0.28186699748039246, + "step": 2021 + }, + { + "epoch": 0.9993821821327072, + "grad_norm": 1.2171160812601536, + "learning_rate": 1.0852789821553228e-05, + "loss": 0.3527688980102539, + "step": 2022 + }, + { + "epoch": 0.9998764364265414, + "grad_norm": 1.2020406854373213, + "learning_rate": 1.08446487102764e-05, + "loss": 0.30708247423171997, + "step": 2023 + }, + { + "epoch": 1.0, + "grad_norm": 2.286184440614986, + "learning_rate": 1.083650703512416e-05, + "loss": 0.3015655279159546, + "step": 2024 + }, + { + "epoch": 1.0004942542938342, + "grad_norm": 1.2067651750081223, + "learning_rate": 1.0828364801531777e-05, + "loss": 0.29792484641075134, + "step": 2025 + }, + { + "epoch": 1.0009885085876684, + "grad_norm": 1.1529758757862274, + "learning_rate": 1.0820222014934887e-05, + "loss": 0.27995994687080383, + "step": 2026 + }, + { + "epoch": 1.0014827628815026, + "grad_norm": 1.115022133563525, + "learning_rate": 1.0812078680769501e-05, + "loss": 0.25797444581985474, + "step": 2027 + }, + { + "epoch": 1.0019770171753366, + "grad_norm": 1.1202805963305373, + "learning_rate": 1.0803934804471991e-05, + "loss": 0.2834373116493225, + "step": 2028 + }, + { + "epoch": 1.0024712714691708, + "grad_norm": 1.147731866533824, + "learning_rate": 1.079579039147909e-05, + "loss": 0.27055832743644714, + "step": 2029 + }, + { + "epoch": 1.002965525763005, + "grad_norm": 1.1916483552600579, + "learning_rate": 1.0787645447227897e-05, + "loss": 0.30029311776161194, + "step": 2030 + }, + { + "epoch": 1.0034597800568392, + "grad_norm": 1.1834514894044206, + "learning_rate": 1.0779499977155858e-05, + "loss": 0.2741442322731018, + "step": 2031 + }, + { + "epoch": 1.0039540343506734, + "grad_norm": 1.1233171341295944, + "learning_rate": 1.0771353986700767e-05, + "loss": 0.27097994089126587, + "step": 2032 + }, + { + "epoch": 1.0044482886445076, + "grad_norm": 1.1267943347727831, + "learning_rate": 1.0763207481300781e-05, + "loss": 0.2690125107765198, + "step": 2033 + }, + { + "epoch": 1.0049425429383418, + "grad_norm": 1.1312636860673373, + "learning_rate": 1.0755060466394383e-05, + "loss": 0.29656079411506653, + "step": 2034 + }, + { + "epoch": 1.005436797232176, + "grad_norm": 1.1729529368370135, + "learning_rate": 1.0746912947420407e-05, + "loss": 0.25291675329208374, + "step": 2035 + }, + { + "epoch": 1.00593105152601, + "grad_norm": 1.410951786073956, + "learning_rate": 1.0738764929818017e-05, + "loss": 0.26391562819480896, + "step": 2036 + }, + { + "epoch": 1.0064253058198442, + "grad_norm": 1.258204498994485, + "learning_rate": 1.073061641902672e-05, + "loss": 0.2850308418273926, + "step": 2037 + }, + { + "epoch": 1.0069195601136784, + "grad_norm": 1.1368887973206072, + "learning_rate": 1.0722467420486338e-05, + "loss": 0.2529013454914093, + "step": 2038 + }, + { + "epoch": 1.0074138144075127, + "grad_norm": 1.2420233139292696, + "learning_rate": 1.0714317939637028e-05, + "loss": 0.2577154040336609, + "step": 2039 + }, + { + "epoch": 1.0079080687013469, + "grad_norm": 1.1996492314644527, + "learning_rate": 1.0706167981919269e-05, + "loss": 0.28677526116371155, + "step": 2040 + }, + { + "epoch": 1.008402322995181, + "grad_norm": 1.210233649974949, + "learning_rate": 1.0698017552773859e-05, + "loss": 0.25146183371543884, + "step": 2041 + }, + { + "epoch": 1.0088965772890153, + "grad_norm": 1.217205041102825, + "learning_rate": 1.0689866657641899e-05, + "loss": 0.29958251118659973, + "step": 2042 + }, + { + "epoch": 1.0093908315828495, + "grad_norm": 1.2422486891064726, + "learning_rate": 1.0681715301964817e-05, + "loss": 0.28512266278266907, + "step": 2043 + }, + { + "epoch": 1.0098850858766835, + "grad_norm": 1.3312817373132209, + "learning_rate": 1.067356349118434e-05, + "loss": 0.29768145084381104, + "step": 2044 + }, + { + "epoch": 1.0103793401705177, + "grad_norm": 1.2397312600868813, + "learning_rate": 1.0665411230742498e-05, + "loss": 0.25144103169441223, + "step": 2045 + }, + { + "epoch": 1.0108735944643519, + "grad_norm": 1.6026936131359757, + "learning_rate": 1.0657258526081629e-05, + "loss": 0.2673259973526001, + "step": 2046 + }, + { + "epoch": 1.011367848758186, + "grad_norm": 1.2940971813114743, + "learning_rate": 1.0649105382644359e-05, + "loss": 0.2845848500728607, + "step": 2047 + }, + { + "epoch": 1.0118621030520203, + "grad_norm": 1.0898574113835153, + "learning_rate": 1.0640951805873607e-05, + "loss": 0.2569392919540405, + "step": 2048 + }, + { + "epoch": 1.0123563573458545, + "grad_norm": 1.2632947550014098, + "learning_rate": 1.0632797801212591e-05, + "loss": 0.250387966632843, + "step": 2049 + }, + { + "epoch": 1.0128506116396887, + "grad_norm": 1.233630096360243, + "learning_rate": 1.0624643374104804e-05, + "loss": 0.28228282928466797, + "step": 2050 + }, + { + "epoch": 1.0133448659335227, + "grad_norm": 1.0888042979148498, + "learning_rate": 1.0616488529994024e-05, + "loss": 0.24724754691123962, + "step": 2051 + }, + { + "epoch": 1.013839120227357, + "grad_norm": 1.2576287774069197, + "learning_rate": 1.0608333274324312e-05, + "loss": 0.268532395362854, + "step": 2052 + }, + { + "epoch": 1.014333374521191, + "grad_norm": 1.1578525571147846, + "learning_rate": 1.0600177612539995e-05, + "loss": 0.27454662322998047, + "step": 2053 + }, + { + "epoch": 1.0148276288150253, + "grad_norm": 1.2050116136682636, + "learning_rate": 1.0592021550085683e-05, + "loss": 0.27497538924217224, + "step": 2054 + }, + { + "epoch": 1.0153218831088595, + "grad_norm": 1.1358282649300115, + "learning_rate": 1.0583865092406237e-05, + "loss": 0.24480152130126953, + "step": 2055 + }, + { + "epoch": 1.0158161374026937, + "grad_norm": 1.1352545460867702, + "learning_rate": 1.0575708244946805e-05, + "loss": 0.23754069209098816, + "step": 2056 + }, + { + "epoch": 1.016310391696528, + "grad_norm": 1.150720407382798, + "learning_rate": 1.056755101315277e-05, + "loss": 0.24541275203227997, + "step": 2057 + }, + { + "epoch": 1.0168046459903621, + "grad_norm": 1.2022551315194179, + "learning_rate": 1.055939340246979e-05, + "loss": 0.27724504470825195, + "step": 2058 + }, + { + "epoch": 1.0172989002841961, + "grad_norm": 1.2400168112160508, + "learning_rate": 1.0551235418343766e-05, + "loss": 0.2869918942451477, + "step": 2059 + }, + { + "epoch": 1.0177931545780303, + "grad_norm": 1.2299839323583324, + "learning_rate": 1.0543077066220854e-05, + "loss": 0.27153679728507996, + "step": 2060 + }, + { + "epoch": 1.0182874088718645, + "grad_norm": 1.1366017541860491, + "learning_rate": 1.0534918351547454e-05, + "loss": 0.2611347436904907, + "step": 2061 + }, + { + "epoch": 1.0187816631656987, + "grad_norm": 1.1317421431613228, + "learning_rate": 1.0526759279770202e-05, + "loss": 0.26649200916290283, + "step": 2062 + }, + { + "epoch": 1.019275917459533, + "grad_norm": 1.0930466767865903, + "learning_rate": 1.0518599856335983e-05, + "loss": 0.25164204835891724, + "step": 2063 + }, + { + "epoch": 1.0197701717533671, + "grad_norm": 1.2027289451385044, + "learning_rate": 1.0510440086691911e-05, + "loss": 0.288251131772995, + "step": 2064 + }, + { + "epoch": 1.0202644260472014, + "grad_norm": 1.2837951062377317, + "learning_rate": 1.0502279976285325e-05, + "loss": 0.27177444100379944, + "step": 2065 + }, + { + "epoch": 1.0207586803410356, + "grad_norm": 1.222948820556725, + "learning_rate": 1.0494119530563812e-05, + "loss": 0.2723502218723297, + "step": 2066 + }, + { + "epoch": 1.0212529346348695, + "grad_norm": 1.214398839170698, + "learning_rate": 1.0485958754975156e-05, + "loss": 0.2704971432685852, + "step": 2067 + }, + { + "epoch": 1.0217471889287038, + "grad_norm": 1.267114179641731, + "learning_rate": 1.0477797654967376e-05, + "loss": 0.30302050709724426, + "step": 2068 + }, + { + "epoch": 1.022241443222538, + "grad_norm": 1.268227752862744, + "learning_rate": 1.0469636235988711e-05, + "loss": 0.26408523321151733, + "step": 2069 + }, + { + "epoch": 1.0227356975163722, + "grad_norm": 1.2197627847133865, + "learning_rate": 1.0461474503487606e-05, + "loss": 0.2691786289215088, + "step": 2070 + }, + { + "epoch": 1.0232299518102064, + "grad_norm": 1.2792531550605064, + "learning_rate": 1.0453312462912714e-05, + "loss": 0.2823137640953064, + "step": 2071 + }, + { + "epoch": 1.0237242061040406, + "grad_norm": 1.2027503273852609, + "learning_rate": 1.04451501197129e-05, + "loss": 0.28837013244628906, + "step": 2072 + }, + { + "epoch": 1.0242184603978748, + "grad_norm": 1.27109994402604, + "learning_rate": 1.0436987479337229e-05, + "loss": 0.2809562683105469, + "step": 2073 + }, + { + "epoch": 1.024712714691709, + "grad_norm": 1.240431430170138, + "learning_rate": 1.0428824547234956e-05, + "loss": 0.2604525685310364, + "step": 2074 + }, + { + "epoch": 1.025206968985543, + "grad_norm": 1.1799966275921325, + "learning_rate": 1.0420661328855546e-05, + "loss": 0.24755606055259705, + "step": 2075 + }, + { + "epoch": 1.0257012232793772, + "grad_norm": 1.148092531592558, + "learning_rate": 1.0412497829648642e-05, + "loss": 0.2592730224132538, + "step": 2076 + }, + { + "epoch": 1.0261954775732114, + "grad_norm": 1.2356689091758393, + "learning_rate": 1.0404334055064083e-05, + "loss": 0.2693594694137573, + "step": 2077 + }, + { + "epoch": 1.0266897318670456, + "grad_norm": 1.2195187999450414, + "learning_rate": 1.0396170010551881e-05, + "loss": 0.2712753117084503, + "step": 2078 + }, + { + "epoch": 1.0271839861608798, + "grad_norm": 1.1741285828383992, + "learning_rate": 1.0388005701562245e-05, + "loss": 0.2693077027797699, + "step": 2079 + }, + { + "epoch": 1.027678240454714, + "grad_norm": 1.2670826968894364, + "learning_rate": 1.0379841133545544e-05, + "loss": 0.2791144847869873, + "step": 2080 + }, + { + "epoch": 1.0281724947485482, + "grad_norm": 1.163594554813514, + "learning_rate": 1.037167631195233e-05, + "loss": 0.27496254444122314, + "step": 2081 + }, + { + "epoch": 1.0286667490423822, + "grad_norm": 1.1305894692188725, + "learning_rate": 1.0363511242233322e-05, + "loss": 0.26037347316741943, + "step": 2082 + }, + { + "epoch": 1.0291610033362164, + "grad_norm": 1.2085934995349474, + "learning_rate": 1.0355345929839402e-05, + "loss": 0.2610514760017395, + "step": 2083 + }, + { + "epoch": 1.0296552576300506, + "grad_norm": 1.1531883738354434, + "learning_rate": 1.0347180380221618e-05, + "loss": 0.24750857055187225, + "step": 2084 + }, + { + "epoch": 1.0301495119238848, + "grad_norm": 1.2017075670935908, + "learning_rate": 1.0339014598831169e-05, + "loss": 0.2835415303707123, + "step": 2085 + }, + { + "epoch": 1.030643766217719, + "grad_norm": 1.2153811049556569, + "learning_rate": 1.033084859111942e-05, + "loss": 0.25762057304382324, + "step": 2086 + }, + { + "epoch": 1.0311380205115532, + "grad_norm": 1.3245241554987517, + "learning_rate": 1.032268236253788e-05, + "loss": 0.2818237841129303, + "step": 2087 + }, + { + "epoch": 1.0316322748053874, + "grad_norm": 1.2402911628462394, + "learning_rate": 1.0314515918538202e-05, + "loss": 0.27192944288253784, + "step": 2088 + }, + { + "epoch": 1.0321265290992216, + "grad_norm": 1.1715597954552734, + "learning_rate": 1.0306349264572195e-05, + "loss": 0.3002319931983948, + "step": 2089 + }, + { + "epoch": 1.0326207833930556, + "grad_norm": 1.221598051409306, + "learning_rate": 1.0298182406091794e-05, + "loss": 0.27106401324272156, + "step": 2090 + }, + { + "epoch": 1.0331150376868898, + "grad_norm": 1.2123644146814079, + "learning_rate": 1.0290015348549076e-05, + "loss": 0.2740558385848999, + "step": 2091 + }, + { + "epoch": 1.033609291980724, + "grad_norm": 1.2394453454529126, + "learning_rate": 1.0281848097396261e-05, + "loss": 0.2970008850097656, + "step": 2092 + }, + { + "epoch": 1.0341035462745582, + "grad_norm": 1.2003549808286662, + "learning_rate": 1.027368065808568e-05, + "loss": 0.27684125304222107, + "step": 2093 + }, + { + "epoch": 1.0345978005683925, + "grad_norm": 1.1371538472805924, + "learning_rate": 1.0265513036069803e-05, + "loss": 0.2732700705528259, + "step": 2094 + }, + { + "epoch": 1.0350920548622267, + "grad_norm": 1.1448190493490698, + "learning_rate": 1.0257345236801215e-05, + "loss": 0.25189805030822754, + "step": 2095 + }, + { + "epoch": 1.0355863091560609, + "grad_norm": 1.1221327830153236, + "learning_rate": 1.0249177265732629e-05, + "loss": 0.3177054524421692, + "step": 2096 + }, + { + "epoch": 1.036080563449895, + "grad_norm": 1.0492479192600686, + "learning_rate": 1.0241009128316854e-05, + "loss": 0.23350921273231506, + "step": 2097 + }, + { + "epoch": 1.036574817743729, + "grad_norm": 1.2565303796372052, + "learning_rate": 1.0232840830006832e-05, + "loss": 0.3011140525341034, + "step": 2098 + }, + { + "epoch": 1.0370690720375633, + "grad_norm": 1.164329016307231, + "learning_rate": 1.0224672376255598e-05, + "loss": 0.2578561305999756, + "step": 2099 + }, + { + "epoch": 1.0375633263313975, + "grad_norm": 1.1701632763887444, + "learning_rate": 1.0216503772516297e-05, + "loss": 0.2622804045677185, + "step": 2100 + }, + { + "epoch": 1.0380575806252317, + "grad_norm": 1.219987069304434, + "learning_rate": 1.0208335024242169e-05, + "loss": 0.2662869691848755, + "step": 2101 + }, + { + "epoch": 1.0385518349190659, + "grad_norm": 1.2303351498865798, + "learning_rate": 1.0200166136886558e-05, + "loss": 0.27084922790527344, + "step": 2102 + }, + { + "epoch": 1.0390460892129, + "grad_norm": 1.2434849653646893, + "learning_rate": 1.0191997115902891e-05, + "loss": 0.26290780305862427, + "step": 2103 + }, + { + "epoch": 1.0395403435067343, + "grad_norm": 1.192171896111284, + "learning_rate": 1.0183827966744694e-05, + "loss": 0.27367106080055237, + "step": 2104 + }, + { + "epoch": 1.0400345978005685, + "grad_norm": 1.2706879657010888, + "learning_rate": 1.0175658694865574e-05, + "loss": 0.28507113456726074, + "step": 2105 + }, + { + "epoch": 1.0405288520944025, + "grad_norm": 1.2299041683114893, + "learning_rate": 1.0167489305719221e-05, + "loss": 0.2533179521560669, + "step": 2106 + }, + { + "epoch": 1.0410231063882367, + "grad_norm": 1.2546449586851505, + "learning_rate": 1.0159319804759398e-05, + "loss": 0.28755924105644226, + "step": 2107 + }, + { + "epoch": 1.041517360682071, + "grad_norm": 1.1726176332749902, + "learning_rate": 1.015115019743995e-05, + "loss": 0.26722773909568787, + "step": 2108 + }, + { + "epoch": 1.042011614975905, + "grad_norm": 1.3986075029095133, + "learning_rate": 1.0142980489214788e-05, + "loss": 0.3122308850288391, + "step": 2109 + }, + { + "epoch": 1.0425058692697393, + "grad_norm": 1.1273960807987882, + "learning_rate": 1.0134810685537899e-05, + "loss": 0.22603261470794678, + "step": 2110 + }, + { + "epoch": 1.0430001235635735, + "grad_norm": 1.1517998097919544, + "learning_rate": 1.0126640791863316e-05, + "loss": 0.2823299169540405, + "step": 2111 + }, + { + "epoch": 1.0434943778574077, + "grad_norm": 1.3191906526904469, + "learning_rate": 1.0118470813645156e-05, + "loss": 0.30999040603637695, + "step": 2112 + }, + { + "epoch": 1.0439886321512417, + "grad_norm": 1.1820148857556874, + "learning_rate": 1.0110300756337569e-05, + "loss": 0.266022264957428, + "step": 2113 + }, + { + "epoch": 1.044482886445076, + "grad_norm": 1.6608098375974347, + "learning_rate": 1.0102130625394776e-05, + "loss": 0.2674095034599304, + "step": 2114 + }, + { + "epoch": 1.0449771407389101, + "grad_norm": 1.2172826939531747, + "learning_rate": 1.0093960426271037e-05, + "loss": 0.30045652389526367, + "step": 2115 + }, + { + "epoch": 1.0454713950327443, + "grad_norm": 1.1782919874699391, + "learning_rate": 1.0085790164420659e-05, + "loss": 0.28455668687820435, + "step": 2116 + }, + { + "epoch": 1.0459656493265785, + "grad_norm": 1.1749948852757104, + "learning_rate": 1.0077619845297992e-05, + "loss": 0.2429066300392151, + "step": 2117 + }, + { + "epoch": 1.0464599036204127, + "grad_norm": 1.1453766958637177, + "learning_rate": 1.0069449474357427e-05, + "loss": 0.2515121102333069, + "step": 2118 + }, + { + "epoch": 1.046954157914247, + "grad_norm": 1.234414346344525, + "learning_rate": 1.0061279057053385e-05, + "loss": 0.30011802911758423, + "step": 2119 + }, + { + "epoch": 1.0474484122080812, + "grad_norm": 1.1997300836338318, + "learning_rate": 1.005310859884032e-05, + "loss": 0.2577645480632782, + "step": 2120 + }, + { + "epoch": 1.0479426665019151, + "grad_norm": 1.0391250618888572, + "learning_rate": 1.0044938105172713e-05, + "loss": 0.21476465463638306, + "step": 2121 + }, + { + "epoch": 1.0484369207957493, + "grad_norm": 1.3902782329860977, + "learning_rate": 1.0036767581505067e-05, + "loss": 0.2587023079395294, + "step": 2122 + }, + { + "epoch": 1.0489311750895836, + "grad_norm": 1.1311469001510768, + "learning_rate": 1.0028597033291911e-05, + "loss": 0.2537185251712799, + "step": 2123 + }, + { + "epoch": 1.0494254293834178, + "grad_norm": 1.0410406857423857, + "learning_rate": 1.0020426465987782e-05, + "loss": 0.24486014246940613, + "step": 2124 + }, + { + "epoch": 1.049919683677252, + "grad_norm": 1.4376390907817962, + "learning_rate": 1.0012255885047241e-05, + "loss": 0.2728436589241028, + "step": 2125 + }, + { + "epoch": 1.0504139379710862, + "grad_norm": 1.3186765660198476, + "learning_rate": 1.0004085295924843e-05, + "loss": 0.30238842964172363, + "step": 2126 + }, + { + "epoch": 1.0509081922649204, + "grad_norm": 1.2910923396564535, + "learning_rate": 9.99591470407516e-06, + "loss": 0.30347609519958496, + "step": 2127 + }, + { + "epoch": 1.0514024465587544, + "grad_norm": 1.2188667375190219, + "learning_rate": 9.987744114952764e-06, + "loss": 0.2581411302089691, + "step": 2128 + }, + { + "epoch": 1.0518967008525886, + "grad_norm": 1.2560629408792487, + "learning_rate": 9.979573534012218e-06, + "loss": 0.239881694316864, + "step": 2129 + }, + { + "epoch": 1.0523909551464228, + "grad_norm": 1.2977893982324902, + "learning_rate": 9.971402966708092e-06, + "loss": 0.3058615028858185, + "step": 2130 + }, + { + "epoch": 1.052885209440257, + "grad_norm": 1.2842102843103194, + "learning_rate": 9.963232418494936e-06, + "loss": 0.25285837054252625, + "step": 2131 + }, + { + "epoch": 1.0533794637340912, + "grad_norm": 1.2217652802535364, + "learning_rate": 9.955061894827294e-06, + "loss": 0.27366510033607483, + "step": 2132 + }, + { + "epoch": 1.0538737180279254, + "grad_norm": 1.1489983530266883, + "learning_rate": 9.946891401159683e-06, + "loss": 0.22268086671829224, + "step": 2133 + }, + { + "epoch": 1.0543679723217596, + "grad_norm": 1.1461059074650484, + "learning_rate": 9.938720942946616e-06, + "loss": 0.2540682554244995, + "step": 2134 + }, + { + "epoch": 1.0548622266155938, + "grad_norm": 1.2357731632052622, + "learning_rate": 9.930550525642576e-06, + "loss": 0.262179970741272, + "step": 2135 + }, + { + "epoch": 1.0553564809094278, + "grad_norm": 1.2267299487839205, + "learning_rate": 9.92238015470201e-06, + "loss": 0.25471946597099304, + "step": 2136 + }, + { + "epoch": 1.055850735203262, + "grad_norm": 1.162352058446371, + "learning_rate": 9.914209835579344e-06, + "loss": 0.2580556571483612, + "step": 2137 + }, + { + "epoch": 1.0563449894970962, + "grad_norm": 1.261401071852413, + "learning_rate": 9.906039573728964e-06, + "loss": 0.29909616708755493, + "step": 2138 + }, + { + "epoch": 1.0568392437909304, + "grad_norm": 1.2162562018595562, + "learning_rate": 9.897869374605226e-06, + "loss": 0.2828724980354309, + "step": 2139 + }, + { + "epoch": 1.0573334980847646, + "grad_norm": 1.2076714268656592, + "learning_rate": 9.889699243662433e-06, + "loss": 0.26731711626052856, + "step": 2140 + }, + { + "epoch": 1.0578277523785988, + "grad_norm": 1.2666827338430986, + "learning_rate": 9.88152918635485e-06, + "loss": 0.2912555932998657, + "step": 2141 + }, + { + "epoch": 1.058322006672433, + "grad_norm": 1.1593053736993435, + "learning_rate": 9.873359208136685e-06, + "loss": 0.2335313856601715, + "step": 2142 + }, + { + "epoch": 1.0588162609662672, + "grad_norm": 1.2934128795704303, + "learning_rate": 9.865189314462105e-06, + "loss": 0.2716987729072571, + "step": 2143 + }, + { + "epoch": 1.0593105152601012, + "grad_norm": 1.3251488161911162, + "learning_rate": 9.857019510785215e-06, + "loss": 0.2919968068599701, + "step": 2144 + }, + { + "epoch": 1.0598047695539354, + "grad_norm": 1.197230535187453, + "learning_rate": 9.848849802560057e-06, + "loss": 0.26279503107070923, + "step": 2145 + }, + { + "epoch": 1.0602990238477696, + "grad_norm": 1.263871154668556, + "learning_rate": 9.840680195240606e-06, + "loss": 0.31622597575187683, + "step": 2146 + }, + { + "epoch": 1.0607932781416038, + "grad_norm": 1.270948260835911, + "learning_rate": 9.832510694280782e-06, + "loss": 0.2399556040763855, + "step": 2147 + }, + { + "epoch": 1.061287532435438, + "grad_norm": 1.2181574543701559, + "learning_rate": 9.824341305134428e-06, + "loss": 0.2650333046913147, + "step": 2148 + }, + { + "epoch": 1.0617817867292723, + "grad_norm": 1.274348887888969, + "learning_rate": 9.816172033255307e-06, + "loss": 0.26629161834716797, + "step": 2149 + }, + { + "epoch": 1.0622760410231065, + "grad_norm": 1.2611051957138737, + "learning_rate": 9.808002884097109e-06, + "loss": 0.28042545914649963, + "step": 2150 + }, + { + "epoch": 1.0627702953169407, + "grad_norm": 1.1495131020915084, + "learning_rate": 9.799833863113445e-06, + "loss": 0.24374082684516907, + "step": 2151 + }, + { + "epoch": 1.0632645496107747, + "grad_norm": 1.1048551979398207, + "learning_rate": 9.791664975757835e-06, + "loss": 0.23013898730278015, + "step": 2152 + }, + { + "epoch": 1.0637588039046089, + "grad_norm": 1.4072884886903234, + "learning_rate": 9.783496227483706e-06, + "loss": 0.25313276052474976, + "step": 2153 + }, + { + "epoch": 1.064253058198443, + "grad_norm": 1.248155174046862, + "learning_rate": 9.775327623744403e-06, + "loss": 0.2642362713813782, + "step": 2154 + }, + { + "epoch": 1.0647473124922773, + "grad_norm": 1.1405325090848468, + "learning_rate": 9.76715916999317e-06, + "loss": 0.2417108118534088, + "step": 2155 + }, + { + "epoch": 1.0652415667861115, + "grad_norm": 1.2556215450887547, + "learning_rate": 9.758990871683148e-06, + "loss": 0.25653502345085144, + "step": 2156 + }, + { + "epoch": 1.0657358210799457, + "grad_norm": 1.22877547041534, + "learning_rate": 9.750822734267378e-06, + "loss": 0.247604638338089, + "step": 2157 + }, + { + "epoch": 1.06623007537378, + "grad_norm": 1.2330600407976389, + "learning_rate": 9.742654763198786e-06, + "loss": 0.2675636112689972, + "step": 2158 + }, + { + "epoch": 1.0667243296676139, + "grad_norm": 1.230290211943024, + "learning_rate": 9.7344869639302e-06, + "loss": 0.2570686340332031, + "step": 2159 + }, + { + "epoch": 1.067218583961448, + "grad_norm": 1.4290278531414855, + "learning_rate": 9.726319341914323e-06, + "loss": 0.3046165704727173, + "step": 2160 + }, + { + "epoch": 1.0677128382552823, + "grad_norm": 1.3759048148010737, + "learning_rate": 9.718151902603744e-06, + "loss": 0.24278515577316284, + "step": 2161 + }, + { + "epoch": 1.0682070925491165, + "grad_norm": 1.235098490769484, + "learning_rate": 9.709984651450924e-06, + "loss": 0.2565615773200989, + "step": 2162 + }, + { + "epoch": 1.0687013468429507, + "grad_norm": 1.3303607886608886, + "learning_rate": 9.701817593908209e-06, + "loss": 0.2672972083091736, + "step": 2163 + }, + { + "epoch": 1.069195601136785, + "grad_norm": 1.1620974642583077, + "learning_rate": 9.693650735427808e-06, + "loss": 0.21376445889472961, + "step": 2164 + }, + { + "epoch": 1.0696898554306191, + "grad_norm": 1.2628274098639385, + "learning_rate": 9.685484081461802e-06, + "loss": 0.27743393182754517, + "step": 2165 + }, + { + "epoch": 1.0701841097244533, + "grad_norm": 1.3615817033316626, + "learning_rate": 9.677317637462125e-06, + "loss": 0.2747134566307068, + "step": 2166 + }, + { + "epoch": 1.0706783640182873, + "grad_norm": 1.1533673233774355, + "learning_rate": 9.669151408880581e-06, + "loss": 0.2775312066078186, + "step": 2167 + }, + { + "epoch": 1.0711726183121215, + "grad_norm": 1.392383813550365, + "learning_rate": 9.660985401168833e-06, + "loss": 0.2743167281150818, + "step": 2168 + }, + { + "epoch": 1.0716668726059557, + "grad_norm": 1.1731022030570613, + "learning_rate": 9.652819619778387e-06, + "loss": 0.26030686497688293, + "step": 2169 + }, + { + "epoch": 1.07216112689979, + "grad_norm": 1.2886350622041207, + "learning_rate": 9.644654070160603e-06, + "loss": 0.32307812571525574, + "step": 2170 + }, + { + "epoch": 1.0726553811936241, + "grad_norm": 1.309807945595821, + "learning_rate": 9.63648875776668e-06, + "loss": 0.2773011028766632, + "step": 2171 + }, + { + "epoch": 1.0731496354874583, + "grad_norm": 1.3767412291020849, + "learning_rate": 9.628323688047672e-06, + "loss": 0.27996528148651123, + "step": 2172 + }, + { + "epoch": 1.0736438897812925, + "grad_norm": 1.176261909375135, + "learning_rate": 9.620158866454459e-06, + "loss": 0.28022176027297974, + "step": 2173 + }, + { + "epoch": 1.0741381440751268, + "grad_norm": 1.1746327357052728, + "learning_rate": 9.61199429843776e-06, + "loss": 0.2688876986503601, + "step": 2174 + }, + { + "epoch": 1.0746323983689607, + "grad_norm": 1.1454924799354713, + "learning_rate": 9.60382998944812e-06, + "loss": 0.23915211856365204, + "step": 2175 + }, + { + "epoch": 1.075126652662795, + "grad_norm": 1.1770664027196904, + "learning_rate": 9.59566594493592e-06, + "loss": 0.2533806264400482, + "step": 2176 + }, + { + "epoch": 1.0756209069566292, + "grad_norm": 1.2321355277799408, + "learning_rate": 9.587502170351361e-06, + "loss": 0.2887522876262665, + "step": 2177 + }, + { + "epoch": 1.0761151612504634, + "grad_norm": 1.2169372388289537, + "learning_rate": 9.579338671144459e-06, + "loss": 0.2885408401489258, + "step": 2178 + }, + { + "epoch": 1.0766094155442976, + "grad_norm": 1.2209492195717289, + "learning_rate": 9.571175452765045e-06, + "loss": 0.25656914710998535, + "step": 2179 + }, + { + "epoch": 1.0771036698381318, + "grad_norm": 1.2669016448608037, + "learning_rate": 9.563012520662773e-06, + "loss": 0.2935143709182739, + "step": 2180 + }, + { + "epoch": 1.077597924131966, + "grad_norm": 1.2902152081672096, + "learning_rate": 9.554849880287103e-06, + "loss": 0.26728200912475586, + "step": 2181 + }, + { + "epoch": 1.0780921784258002, + "grad_norm": 1.4327778934971358, + "learning_rate": 9.546687537087287e-06, + "loss": 0.2558351159095764, + "step": 2182 + }, + { + "epoch": 1.0785864327196342, + "grad_norm": 1.133861673349663, + "learning_rate": 9.538525496512394e-06, + "loss": 0.2517240047454834, + "step": 2183 + }, + { + "epoch": 1.0790806870134684, + "grad_norm": 1.1033603168250732, + "learning_rate": 9.53036376401129e-06, + "loss": 0.23258647322654724, + "step": 2184 + }, + { + "epoch": 1.0795749413073026, + "grad_norm": 1.2016172891455823, + "learning_rate": 9.522202345032627e-06, + "loss": 0.24100016057491302, + "step": 2185 + }, + { + "epoch": 1.0800691956011368, + "grad_norm": 1.1844138198826075, + "learning_rate": 9.51404124502485e-06, + "loss": 0.27807697653770447, + "step": 2186 + }, + { + "epoch": 1.080563449894971, + "grad_norm": 1.2045646158236256, + "learning_rate": 9.50588046943619e-06, + "loss": 0.26146867871284485, + "step": 2187 + }, + { + "epoch": 1.0810577041888052, + "grad_norm": 1.3792610621050578, + "learning_rate": 9.497720023714675e-06, + "loss": 0.28570955991744995, + "step": 2188 + }, + { + "epoch": 1.0815519584826394, + "grad_norm": 1.146591161630138, + "learning_rate": 9.489559913308092e-06, + "loss": 0.22583246231079102, + "step": 2189 + }, + { + "epoch": 1.0820462127764734, + "grad_norm": 1.2292468406383597, + "learning_rate": 9.48140014366402e-06, + "loss": 0.27526232600212097, + "step": 2190 + }, + { + "epoch": 1.0825404670703076, + "grad_norm": 1.287410242270342, + "learning_rate": 9.473240720229803e-06, + "loss": 0.2777514159679413, + "step": 2191 + }, + { + "epoch": 1.0830347213641418, + "grad_norm": 1.217692620890676, + "learning_rate": 9.465081648452549e-06, + "loss": 0.25767001509666443, + "step": 2192 + }, + { + "epoch": 1.083528975657976, + "grad_norm": 1.2401214064051047, + "learning_rate": 9.456922933779148e-06, + "loss": 0.24114865064620972, + "step": 2193 + }, + { + "epoch": 1.0840232299518102, + "grad_norm": 1.3343620945353547, + "learning_rate": 9.448764581656237e-06, + "loss": 0.31198200583457947, + "step": 2194 + }, + { + "epoch": 1.0845174842456444, + "grad_norm": 1.2865355942160217, + "learning_rate": 9.440606597530213e-06, + "loss": 0.2724478840827942, + "step": 2195 + }, + { + "epoch": 1.0850117385394786, + "grad_norm": 1.2982367761916904, + "learning_rate": 9.432448986847229e-06, + "loss": 0.27796900272369385, + "step": 2196 + }, + { + "epoch": 1.0855059928333128, + "grad_norm": 1.293883522594156, + "learning_rate": 9.424291755053198e-06, + "loss": 0.2877587676048279, + "step": 2197 + }, + { + "epoch": 1.0860002471271468, + "grad_norm": 1.354561961211439, + "learning_rate": 9.416134907593764e-06, + "loss": 0.2898337244987488, + "step": 2198 + }, + { + "epoch": 1.086494501420981, + "grad_norm": 1.2931825621227928, + "learning_rate": 9.407978449914322e-06, + "loss": 0.2544672191143036, + "step": 2199 + }, + { + "epoch": 1.0869887557148152, + "grad_norm": 1.2905943399481439, + "learning_rate": 9.399822387460005e-06, + "loss": 0.28336071968078613, + "step": 2200 + }, + { + "epoch": 1.0874830100086494, + "grad_norm": 1.2871287196611743, + "learning_rate": 9.391666725675691e-06, + "loss": 0.2862734794616699, + "step": 2201 + }, + { + "epoch": 1.0879772643024836, + "grad_norm": 1.386969000020192, + "learning_rate": 9.383511470005978e-06, + "loss": 0.26331260800361633, + "step": 2202 + }, + { + "epoch": 1.0884715185963179, + "grad_norm": 1.2750467510922643, + "learning_rate": 9.375356625895201e-06, + "loss": 0.30087417364120483, + "step": 2203 + }, + { + "epoch": 1.088965772890152, + "grad_norm": 1.3434362766675538, + "learning_rate": 9.36720219878741e-06, + "loss": 0.2736594080924988, + "step": 2204 + }, + { + "epoch": 1.089460027183986, + "grad_norm": 1.4852243291487657, + "learning_rate": 9.359048194126395e-06, + "loss": 0.2704418897628784, + "step": 2205 + }, + { + "epoch": 1.0899542814778203, + "grad_norm": 1.2230094225693318, + "learning_rate": 9.350894617355645e-06, + "loss": 0.24540236592292786, + "step": 2206 + }, + { + "epoch": 1.0904485357716545, + "grad_norm": 1.2299505503288506, + "learning_rate": 9.342741473918375e-06, + "loss": 0.26376527547836304, + "step": 2207 + }, + { + "epoch": 1.0909427900654887, + "grad_norm": 1.0803859595224048, + "learning_rate": 9.334588769257502e-06, + "loss": 0.24062004685401917, + "step": 2208 + }, + { + "epoch": 1.0914370443593229, + "grad_norm": 1.1443970874822365, + "learning_rate": 9.326436508815662e-06, + "loss": 0.24209418892860413, + "step": 2209 + }, + { + "epoch": 1.091931298653157, + "grad_norm": 1.3414968412819865, + "learning_rate": 9.318284698035188e-06, + "loss": 0.2732285261154175, + "step": 2210 + }, + { + "epoch": 1.0924255529469913, + "grad_norm": 1.2470429271312866, + "learning_rate": 9.310133342358106e-06, + "loss": 0.2684158980846405, + "step": 2211 + }, + { + "epoch": 1.0929198072408255, + "grad_norm": 1.1035267199988392, + "learning_rate": 9.301982447226145e-06, + "loss": 0.22511601448059082, + "step": 2212 + }, + { + "epoch": 1.0934140615346597, + "grad_norm": 1.165505029883992, + "learning_rate": 9.293832018080731e-06, + "loss": 0.2622867226600647, + "step": 2213 + }, + { + "epoch": 1.0939083158284937, + "grad_norm": 1.2923685951682604, + "learning_rate": 9.285682060362974e-06, + "loss": 0.3030891418457031, + "step": 2214 + }, + { + "epoch": 1.094402570122328, + "grad_norm": 1.2523210407583818, + "learning_rate": 9.277532579513666e-06, + "loss": 0.24928592145442963, + "step": 2215 + }, + { + "epoch": 1.094896824416162, + "grad_norm": 1.2048717570746186, + "learning_rate": 9.269383580973285e-06, + "loss": 0.2588339149951935, + "step": 2216 + }, + { + "epoch": 1.0953910787099963, + "grad_norm": 1.2427748942142012, + "learning_rate": 9.261235070181983e-06, + "loss": 0.2587873339653015, + "step": 2217 + }, + { + "epoch": 1.0958853330038305, + "grad_norm": 1.3192410250632676, + "learning_rate": 9.253087052579596e-06, + "loss": 0.29420971870422363, + "step": 2218 + }, + { + "epoch": 1.0963795872976647, + "grad_norm": 1.1714489078180652, + "learning_rate": 9.244939533605619e-06, + "loss": 0.25384342670440674, + "step": 2219 + }, + { + "epoch": 1.096873841591499, + "grad_norm": 1.2208998726962157, + "learning_rate": 9.236792518699224e-06, + "loss": 0.23133251070976257, + "step": 2220 + }, + { + "epoch": 1.097368095885333, + "grad_norm": 1.1919788928879418, + "learning_rate": 9.228646013299233e-06, + "loss": 0.26196008920669556, + "step": 2221 + }, + { + "epoch": 1.0978623501791671, + "grad_norm": 1.345065700534229, + "learning_rate": 9.220500022844144e-06, + "loss": 0.2567690908908844, + "step": 2222 + }, + { + "epoch": 1.0983566044730013, + "grad_norm": 1.1808254692787845, + "learning_rate": 9.212354552772107e-06, + "loss": 0.2555367350578308, + "step": 2223 + }, + { + "epoch": 1.0988508587668355, + "grad_norm": 1.1544608952675586, + "learning_rate": 9.204209608520913e-06, + "loss": 0.24357245862483978, + "step": 2224 + }, + { + "epoch": 1.0993451130606697, + "grad_norm": 1.3367524689374175, + "learning_rate": 9.19606519552801e-06, + "loss": 0.2792712450027466, + "step": 2225 + }, + { + "epoch": 1.099839367354504, + "grad_norm": 1.3277136329189279, + "learning_rate": 9.1879213192305e-06, + "loss": 0.29090794920921326, + "step": 2226 + }, + { + "epoch": 1.1003336216483381, + "grad_norm": 1.304360721279056, + "learning_rate": 9.179777985065115e-06, + "loss": 0.2777528762817383, + "step": 2227 + }, + { + "epoch": 1.1008278759421724, + "grad_norm": 1.1781995191131436, + "learning_rate": 9.171635198468227e-06, + "loss": 0.263868123292923, + "step": 2228 + }, + { + "epoch": 1.1013221302360063, + "grad_norm": 1.184942105326879, + "learning_rate": 9.16349296487584e-06, + "loss": 0.24118748307228088, + "step": 2229 + }, + { + "epoch": 1.1018163845298405, + "grad_norm": 1.2411255946822906, + "learning_rate": 9.155351289723603e-06, + "loss": 0.2176896631717682, + "step": 2230 + }, + { + "epoch": 1.1023106388236747, + "grad_norm": 1.3759218504425914, + "learning_rate": 9.147210178446776e-06, + "loss": 0.24727840721607208, + "step": 2231 + }, + { + "epoch": 1.102804893117509, + "grad_norm": 1.287783002848043, + "learning_rate": 9.139069636480247e-06, + "loss": 0.2711295783519745, + "step": 2232 + }, + { + "epoch": 1.1032991474113432, + "grad_norm": 1.2808604096079383, + "learning_rate": 9.130929669258525e-06, + "loss": 0.2987736165523529, + "step": 2233 + }, + { + "epoch": 1.1037934017051774, + "grad_norm": 1.3771259989337001, + "learning_rate": 9.122790282215743e-06, + "loss": 0.2773835062980652, + "step": 2234 + }, + { + "epoch": 1.1042876559990116, + "grad_norm": 1.2299830744412572, + "learning_rate": 9.114651480785632e-06, + "loss": 0.29417523741722107, + "step": 2235 + }, + { + "epoch": 1.1047819102928456, + "grad_norm": 1.377692958442212, + "learning_rate": 9.106513270401545e-06, + "loss": 0.2642611265182495, + "step": 2236 + }, + { + "epoch": 1.1052761645866798, + "grad_norm": 1.2764125735134089, + "learning_rate": 9.098375656496434e-06, + "loss": 0.2789427638053894, + "step": 2237 + }, + { + "epoch": 1.105770418880514, + "grad_norm": 1.3238778744589295, + "learning_rate": 9.090238644502845e-06, + "loss": 0.3002237379550934, + "step": 2238 + }, + { + "epoch": 1.1062646731743482, + "grad_norm": 1.1862434874371655, + "learning_rate": 9.082102239852942e-06, + "loss": 0.27620676159858704, + "step": 2239 + }, + { + "epoch": 1.1067589274681824, + "grad_norm": 1.327009037228036, + "learning_rate": 9.07396644797847e-06, + "loss": 0.26718735694885254, + "step": 2240 + }, + { + "epoch": 1.1072531817620166, + "grad_norm": 1.3581828145326202, + "learning_rate": 9.065831274310763e-06, + "loss": 0.27443817257881165, + "step": 2241 + }, + { + "epoch": 1.1077474360558508, + "grad_norm": 1.2348189100714968, + "learning_rate": 9.057696724280748e-06, + "loss": 0.2536284923553467, + "step": 2242 + }, + { + "epoch": 1.108241690349685, + "grad_norm": 1.274876240899672, + "learning_rate": 9.049562803318942e-06, + "loss": 0.2583077549934387, + "step": 2243 + }, + { + "epoch": 1.108735944643519, + "grad_norm": 1.2591915779147578, + "learning_rate": 9.041429516855427e-06, + "loss": 0.2696278393268585, + "step": 2244 + }, + { + "epoch": 1.1092301989373532, + "grad_norm": 1.4248240108913692, + "learning_rate": 9.033296870319868e-06, + "loss": 0.2966364622116089, + "step": 2245 + }, + { + "epoch": 1.1097244532311874, + "grad_norm": 1.1050822330716321, + "learning_rate": 9.025164869141503e-06, + "loss": 0.22690679132938385, + "step": 2246 + }, + { + "epoch": 1.1102187075250216, + "grad_norm": 1.192560579016723, + "learning_rate": 9.017033518749147e-06, + "loss": 0.2777915894985199, + "step": 2247 + }, + { + "epoch": 1.1107129618188558, + "grad_norm": 1.3394858504136318, + "learning_rate": 9.008902824571168e-06, + "loss": 0.2890303134918213, + "step": 2248 + }, + { + "epoch": 1.11120721611269, + "grad_norm": 1.0426463189164805, + "learning_rate": 9.000772792035505e-06, + "loss": 0.22669392824172974, + "step": 2249 + }, + { + "epoch": 1.1117014704065242, + "grad_norm": 1.1970809485558533, + "learning_rate": 8.992643426569643e-06, + "loss": 0.26416563987731934, + "step": 2250 + }, + { + "epoch": 1.1121957247003584, + "grad_norm": 1.1888202892832207, + "learning_rate": 8.984514733600641e-06, + "loss": 0.2745298147201538, + "step": 2251 + }, + { + "epoch": 1.1126899789941924, + "grad_norm": 1.3798693264357922, + "learning_rate": 8.97638671855509e-06, + "loss": 0.31175684928894043, + "step": 2252 + }, + { + "epoch": 1.1131842332880266, + "grad_norm": 1.1626887122886307, + "learning_rate": 8.968259386859146e-06, + "loss": 0.2632657289505005, + "step": 2253 + }, + { + "epoch": 1.1136784875818608, + "grad_norm": 1.810662888324155, + "learning_rate": 8.960132743938485e-06, + "loss": 0.25820252299308777, + "step": 2254 + }, + { + "epoch": 1.114172741875695, + "grad_norm": 1.061521514088085, + "learning_rate": 8.95200679521835e-06, + "loss": 0.24255456030368805, + "step": 2255 + }, + { + "epoch": 1.1146669961695292, + "grad_norm": 1.2696759740581753, + "learning_rate": 8.943881546123506e-06, + "loss": 0.2973442077636719, + "step": 2256 + }, + { + "epoch": 1.1151612504633635, + "grad_norm": 1.1336353694819978, + "learning_rate": 8.935757002078252e-06, + "loss": 0.23320606350898743, + "step": 2257 + }, + { + "epoch": 1.1156555047571977, + "grad_norm": 1.275444057796017, + "learning_rate": 8.927633168506415e-06, + "loss": 0.2923268675804138, + "step": 2258 + }, + { + "epoch": 1.1161497590510319, + "grad_norm": 1.25496425665649, + "learning_rate": 8.91951005083135e-06, + "loss": 0.25932425260543823, + "step": 2259 + }, + { + "epoch": 1.1166440133448658, + "grad_norm": 1.2215943645090854, + "learning_rate": 8.911387654475943e-06, + "loss": 0.2631821036338806, + "step": 2260 + }, + { + "epoch": 1.1171382676387, + "grad_norm": 1.226020936236602, + "learning_rate": 8.903265984862581e-06, + "loss": 0.24741420149803162, + "step": 2261 + }, + { + "epoch": 1.1176325219325343, + "grad_norm": 1.165036984102613, + "learning_rate": 8.895145047413178e-06, + "loss": 0.2593516707420349, + "step": 2262 + }, + { + "epoch": 1.1181267762263685, + "grad_norm": 1.2132388690590856, + "learning_rate": 8.88702484754915e-06, + "loss": 0.22109609842300415, + "step": 2263 + }, + { + "epoch": 1.1186210305202027, + "grad_norm": 1.242512673005374, + "learning_rate": 8.878905390691437e-06, + "loss": 0.24363039433956146, + "step": 2264 + }, + { + "epoch": 1.1191152848140369, + "grad_norm": 1.210365574835302, + "learning_rate": 8.870786682260465e-06, + "loss": 0.2507505714893341, + "step": 2265 + }, + { + "epoch": 1.119609539107871, + "grad_norm": 1.3229609964254254, + "learning_rate": 8.86266872767617e-06, + "loss": 0.303046315908432, + "step": 2266 + }, + { + "epoch": 1.120103793401705, + "grad_norm": 1.282548473383847, + "learning_rate": 8.854551532357977e-06, + "loss": 0.257943332195282, + "step": 2267 + }, + { + "epoch": 1.1205980476955393, + "grad_norm": 1.2641740973335522, + "learning_rate": 8.84643510172482e-06, + "loss": 0.2697421610355377, + "step": 2268 + }, + { + "epoch": 1.1210923019893735, + "grad_norm": 1.126371134669409, + "learning_rate": 8.838319441195105e-06, + "loss": 0.20090234279632568, + "step": 2269 + }, + { + "epoch": 1.1215865562832077, + "grad_norm": 1.3584193930662543, + "learning_rate": 8.830204556186736e-06, + "loss": 0.2714189887046814, + "step": 2270 + }, + { + "epoch": 1.122080810577042, + "grad_norm": 1.1168786328747864, + "learning_rate": 8.822090452117084e-06, + "loss": 0.23497477173805237, + "step": 2271 + }, + { + "epoch": 1.122575064870876, + "grad_norm": 1.3047944688196833, + "learning_rate": 8.81397713440302e-06, + "loss": 0.2582445740699768, + "step": 2272 + }, + { + "epoch": 1.1230693191647103, + "grad_norm": 1.2807794267280126, + "learning_rate": 8.805864608460876e-06, + "loss": 0.26494619250297546, + "step": 2273 + }, + { + "epoch": 1.1235635734585445, + "grad_norm": 1.3251515621500554, + "learning_rate": 8.797752879706455e-06, + "loss": 0.2767868936061859, + "step": 2274 + }, + { + "epoch": 1.1240578277523785, + "grad_norm": 1.5161646380346314, + "learning_rate": 8.789641953555032e-06, + "loss": 0.27696311473846436, + "step": 2275 + }, + { + "epoch": 1.1245520820462127, + "grad_norm": 1.3659389136687503, + "learning_rate": 8.78153183542135e-06, + "loss": 0.27048689126968384, + "step": 2276 + }, + { + "epoch": 1.125046336340047, + "grad_norm": 1.3893625373049876, + "learning_rate": 8.773422530719606e-06, + "loss": 0.2940211892127991, + "step": 2277 + }, + { + "epoch": 1.1255405906338811, + "grad_norm": 1.310212206650707, + "learning_rate": 8.765314044863453e-06, + "loss": 0.24859851598739624, + "step": 2278 + }, + { + "epoch": 1.1260348449277153, + "grad_norm": 1.3087530353150083, + "learning_rate": 8.757206383265998e-06, + "loss": 0.28879350423812866, + "step": 2279 + }, + { + "epoch": 1.1265290992215495, + "grad_norm": 1.2514534154786532, + "learning_rate": 8.74909955133981e-06, + "loss": 0.24804209172725677, + "step": 2280 + }, + { + "epoch": 1.1270233535153837, + "grad_norm": 1.3358056447173947, + "learning_rate": 8.740993554496886e-06, + "loss": 0.3199496567249298, + "step": 2281 + }, + { + "epoch": 1.1275176078092177, + "grad_norm": 2.15705729620974, + "learning_rate": 8.732888398148678e-06, + "loss": 0.3098929524421692, + "step": 2282 + }, + { + "epoch": 1.128011862103052, + "grad_norm": 1.2048730778866592, + "learning_rate": 8.724784087706067e-06, + "loss": 0.21280749142169952, + "step": 2283 + }, + { + "epoch": 1.1285061163968861, + "grad_norm": 1.1819530781050969, + "learning_rate": 8.716680628579382e-06, + "loss": 0.25330856442451477, + "step": 2284 + }, + { + "epoch": 1.1290003706907203, + "grad_norm": 1.2218083349938962, + "learning_rate": 8.708578026178371e-06, + "loss": 0.26141977310180664, + "step": 2285 + }, + { + "epoch": 1.1294946249845546, + "grad_norm": 1.3085311775335164, + "learning_rate": 8.700476285912219e-06, + "loss": 0.2529010772705078, + "step": 2286 + }, + { + "epoch": 1.1299888792783888, + "grad_norm": 1.4496496993285695, + "learning_rate": 8.69237541318953e-06, + "loss": 0.2662504315376282, + "step": 2287 + }, + { + "epoch": 1.130483133572223, + "grad_norm": 1.2797233255982605, + "learning_rate": 8.684275413418329e-06, + "loss": 0.2724575996398926, + "step": 2288 + }, + { + "epoch": 1.1309773878660572, + "grad_norm": 1.2524016016810007, + "learning_rate": 8.676176292006065e-06, + "loss": 0.2820962965488434, + "step": 2289 + }, + { + "epoch": 1.1314716421598914, + "grad_norm": 1.2157522787611978, + "learning_rate": 8.668078054359595e-06, + "loss": 0.2594743072986603, + "step": 2290 + }, + { + "epoch": 1.1319658964537254, + "grad_norm": 1.1017631552140204, + "learning_rate": 8.659980705885183e-06, + "loss": 0.25397709012031555, + "step": 2291 + }, + { + "epoch": 1.1324601507475596, + "grad_norm": 1.3505914192645034, + "learning_rate": 8.651884251988503e-06, + "loss": 0.27261337637901306, + "step": 2292 + }, + { + "epoch": 1.1329544050413938, + "grad_norm": 1.191460472235454, + "learning_rate": 8.643788698074638e-06, + "loss": 0.2726992070674896, + "step": 2293 + }, + { + "epoch": 1.133448659335228, + "grad_norm": 1.2175895117879216, + "learning_rate": 8.635694049548058e-06, + "loss": 0.2792774438858032, + "step": 2294 + }, + { + "epoch": 1.1339429136290622, + "grad_norm": 1.272860546351146, + "learning_rate": 8.627600311812638e-06, + "loss": 0.310885488986969, + "step": 2295 + }, + { + "epoch": 1.1344371679228964, + "grad_norm": 1.2747295027163217, + "learning_rate": 8.619507490271638e-06, + "loss": 0.27060413360595703, + "step": 2296 + }, + { + "epoch": 1.1349314222167306, + "grad_norm": 1.2507140444567972, + "learning_rate": 8.611415590327718e-06, + "loss": 0.27069440484046936, + "step": 2297 + }, + { + "epoch": 1.1354256765105646, + "grad_norm": 1.2299186955801236, + "learning_rate": 8.603324617382905e-06, + "loss": 0.2790459990501404, + "step": 2298 + }, + { + "epoch": 1.1359199308043988, + "grad_norm": 1.2813816772493964, + "learning_rate": 8.595234576838624e-06, + "loss": 0.27170947194099426, + "step": 2299 + }, + { + "epoch": 1.136414185098233, + "grad_norm": 1.1903279302585759, + "learning_rate": 8.587145474095665e-06, + "loss": 0.25313863158226013, + "step": 2300 + }, + { + "epoch": 1.1369084393920672, + "grad_norm": 1.2968469055543796, + "learning_rate": 8.5790573145542e-06, + "loss": 0.289467990398407, + "step": 2301 + }, + { + "epoch": 1.1374026936859014, + "grad_norm": 1.3141096348522086, + "learning_rate": 8.570970103613774e-06, + "loss": 0.29796460270881653, + "step": 2302 + }, + { + "epoch": 1.1378969479797356, + "grad_norm": 1.2855551342619271, + "learning_rate": 8.562883846673286e-06, + "loss": 0.27264270186424255, + "step": 2303 + }, + { + "epoch": 1.1383912022735698, + "grad_norm": 1.2243974310235655, + "learning_rate": 8.554798549131005e-06, + "loss": 0.3099757134914398, + "step": 2304 + }, + { + "epoch": 1.138885456567404, + "grad_norm": 1.2936181628424743, + "learning_rate": 8.546714216384565e-06, + "loss": 0.30002498626708984, + "step": 2305 + }, + { + "epoch": 1.139379710861238, + "grad_norm": 1.7617864884936485, + "learning_rate": 8.538630853830951e-06, + "loss": 0.2428818643093109, + "step": 2306 + }, + { + "epoch": 1.1398739651550722, + "grad_norm": 1.24686983002664, + "learning_rate": 8.530548466866497e-06, + "loss": 0.2601294219493866, + "step": 2307 + }, + { + "epoch": 1.1403682194489064, + "grad_norm": 1.2066765531591284, + "learning_rate": 8.522467060886888e-06, + "loss": 0.23878628015518188, + "step": 2308 + }, + { + "epoch": 1.1408624737427406, + "grad_norm": 1.345733709932402, + "learning_rate": 8.514386641287163e-06, + "loss": 0.2780643403530121, + "step": 2309 + }, + { + "epoch": 1.1413567280365748, + "grad_norm": 1.2756115099724787, + "learning_rate": 8.506307213461689e-06, + "loss": 0.29834824800491333, + "step": 2310 + }, + { + "epoch": 1.141850982330409, + "grad_norm": 1.3376095615389103, + "learning_rate": 8.498228782804175e-06, + "loss": 0.2733996510505676, + "step": 2311 + }, + { + "epoch": 1.1423452366242433, + "grad_norm": 1.3063802509871558, + "learning_rate": 8.490151354707669e-06, + "loss": 0.2524843215942383, + "step": 2312 + }, + { + "epoch": 1.1428394909180772, + "grad_norm": 1.2776723106689647, + "learning_rate": 8.482074934564543e-06, + "loss": 0.29077857732772827, + "step": 2313 + }, + { + "epoch": 1.1433337452119114, + "grad_norm": 1.2114776729729342, + "learning_rate": 8.473999527766503e-06, + "loss": 0.25935155153274536, + "step": 2314 + }, + { + "epoch": 1.1438279995057457, + "grad_norm": 1.3166365920869918, + "learning_rate": 8.465925139704578e-06, + "loss": 0.23595012724399567, + "step": 2315 + }, + { + "epoch": 1.1443222537995799, + "grad_norm": 1.2268504419293456, + "learning_rate": 8.457851775769108e-06, + "loss": 0.25193360447883606, + "step": 2316 + }, + { + "epoch": 1.144816508093414, + "grad_norm": 1.2847886622034916, + "learning_rate": 8.449779441349755e-06, + "loss": 0.26844412088394165, + "step": 2317 + }, + { + "epoch": 1.1453107623872483, + "grad_norm": 1.2550831674884213, + "learning_rate": 8.441708141835499e-06, + "loss": 0.2507320046424866, + "step": 2318 + }, + { + "epoch": 1.1458050166810825, + "grad_norm": 1.31186920690482, + "learning_rate": 8.433637882614624e-06, + "loss": 0.2756047248840332, + "step": 2319 + }, + { + "epoch": 1.1462992709749167, + "grad_norm": 1.3818376930568548, + "learning_rate": 8.425568669074717e-06, + "loss": 0.3136482536792755, + "step": 2320 + }, + { + "epoch": 1.146793525268751, + "grad_norm": 1.3094285230006764, + "learning_rate": 8.417500506602668e-06, + "loss": 0.25975438952445984, + "step": 2321 + }, + { + "epoch": 1.1472877795625849, + "grad_norm": 1.3148310008881885, + "learning_rate": 8.409433400584674e-06, + "loss": 0.2524915039539337, + "step": 2322 + }, + { + "epoch": 1.147782033856419, + "grad_norm": 1.316055955366049, + "learning_rate": 8.401367356406214e-06, + "loss": 0.2731180787086487, + "step": 2323 + }, + { + "epoch": 1.1482762881502533, + "grad_norm": 1.4277670811350172, + "learning_rate": 8.393302379452065e-06, + "loss": 0.27752095460891724, + "step": 2324 + }, + { + "epoch": 1.1487705424440875, + "grad_norm": 1.2586766809004215, + "learning_rate": 8.385238475106287e-06, + "loss": 0.269240140914917, + "step": 2325 + }, + { + "epoch": 1.1492647967379217, + "grad_norm": 1.301058586916402, + "learning_rate": 8.377175648752236e-06, + "loss": 0.2668418288230896, + "step": 2326 + }, + { + "epoch": 1.149759051031756, + "grad_norm": 1.2869179599070777, + "learning_rate": 8.369113905772532e-06, + "loss": 0.29276758432388306, + "step": 2327 + }, + { + "epoch": 1.15025330532559, + "grad_norm": 1.240170388592341, + "learning_rate": 8.361053251549083e-06, + "loss": 0.26562872529029846, + "step": 2328 + }, + { + "epoch": 1.150747559619424, + "grad_norm": 1.2907483203574122, + "learning_rate": 8.352993691463063e-06, + "loss": 0.257779061794281, + "step": 2329 + }, + { + "epoch": 1.1512418139132583, + "grad_norm": 1.3761256870332743, + "learning_rate": 8.344935230894926e-06, + "loss": 0.2871868312358856, + "step": 2330 + }, + { + "epoch": 1.1517360682070925, + "grad_norm": 1.2766304490065612, + "learning_rate": 8.336877875224379e-06, + "loss": 0.25191348791122437, + "step": 2331 + }, + { + "epoch": 1.1522303225009267, + "grad_norm": 1.1532415542893881, + "learning_rate": 8.3288216298304e-06, + "loss": 0.27057239413261414, + "step": 2332 + }, + { + "epoch": 1.152724576794761, + "grad_norm": 1.3903855220327628, + "learning_rate": 8.32076650009122e-06, + "loss": 0.31574326753616333, + "step": 2333 + }, + { + "epoch": 1.1532188310885951, + "grad_norm": 1.5549371484345924, + "learning_rate": 8.312712491384332e-06, + "loss": 0.22503693401813507, + "step": 2334 + }, + { + "epoch": 1.1537130853824293, + "grad_norm": 1.2363735263099107, + "learning_rate": 8.304659609086478e-06, + "loss": 0.25754863023757935, + "step": 2335 + }, + { + "epoch": 1.1542073396762635, + "grad_norm": 1.1790773293013888, + "learning_rate": 8.296607858573646e-06, + "loss": 0.24367934465408325, + "step": 2336 + }, + { + "epoch": 1.1547015939700975, + "grad_norm": 1.2792693896599328, + "learning_rate": 8.288557245221068e-06, + "loss": 0.28907084465026855, + "step": 2337 + }, + { + "epoch": 1.1551958482639317, + "grad_norm": 1.1852966795691644, + "learning_rate": 8.280507774403217e-06, + "loss": 0.24526283144950867, + "step": 2338 + }, + { + "epoch": 1.155690102557766, + "grad_norm": 1.1473094958169556, + "learning_rate": 8.272459451493811e-06, + "loss": 0.21968787908554077, + "step": 2339 + }, + { + "epoch": 1.1561843568516001, + "grad_norm": 1.1613080622383485, + "learning_rate": 8.264412281865791e-06, + "loss": 0.23803061246871948, + "step": 2340 + }, + { + "epoch": 1.1566786111454344, + "grad_norm": 1.2818425210270699, + "learning_rate": 8.256366270891335e-06, + "loss": 0.25715917348861694, + "step": 2341 + }, + { + "epoch": 1.1571728654392686, + "grad_norm": 1.300939575113673, + "learning_rate": 8.248321423941836e-06, + "loss": 0.29443520307540894, + "step": 2342 + }, + { + "epoch": 1.1576671197331028, + "grad_norm": 1.2224332053171705, + "learning_rate": 8.240277746387934e-06, + "loss": 0.24904949963092804, + "step": 2343 + }, + { + "epoch": 1.1581613740269368, + "grad_norm": 1.2866663921835886, + "learning_rate": 8.23223524359946e-06, + "loss": 0.2594628632068634, + "step": 2344 + }, + { + "epoch": 1.158655628320771, + "grad_norm": 1.2731058113968243, + "learning_rate": 8.224193920945482e-06, + "loss": 0.23853302001953125, + "step": 2345 + }, + { + "epoch": 1.1591498826146052, + "grad_norm": 1.3394742959570003, + "learning_rate": 8.216153783794266e-06, + "loss": 0.25465112924575806, + "step": 2346 + }, + { + "epoch": 1.1596441369084394, + "grad_norm": 1.3135301213887383, + "learning_rate": 8.208114837513297e-06, + "loss": 0.28038230538368225, + "step": 2347 + }, + { + "epoch": 1.1601383912022736, + "grad_norm": 1.350685866794537, + "learning_rate": 8.200077087469262e-06, + "loss": 0.3144591450691223, + "step": 2348 + }, + { + "epoch": 1.1606326454961078, + "grad_norm": 1.281224607522297, + "learning_rate": 8.192040539028047e-06, + "loss": 0.25782787799835205, + "step": 2349 + }, + { + "epoch": 1.161126899789942, + "grad_norm": 1.5124699254380607, + "learning_rate": 8.18400519755473e-06, + "loss": 0.21928566694259644, + "step": 2350 + }, + { + "epoch": 1.1616211540837762, + "grad_norm": 1.2617101773123074, + "learning_rate": 8.175971068413598e-06, + "loss": 0.2277221381664276, + "step": 2351 + }, + { + "epoch": 1.1621154083776104, + "grad_norm": 1.3465952359588251, + "learning_rate": 8.16793815696812e-06, + "loss": 0.26971378922462463, + "step": 2352 + }, + { + "epoch": 1.1626096626714444, + "grad_norm": 1.352802202139023, + "learning_rate": 8.15990646858095e-06, + "loss": 0.26448535919189453, + "step": 2353 + }, + { + "epoch": 1.1631039169652786, + "grad_norm": 1.3091049684475664, + "learning_rate": 8.151876008613927e-06, + "loss": 0.26372095942497253, + "step": 2354 + }, + { + "epoch": 1.1635981712591128, + "grad_norm": 1.3450938198850664, + "learning_rate": 8.143846782428078e-06, + "loss": 0.2594243288040161, + "step": 2355 + }, + { + "epoch": 1.164092425552947, + "grad_norm": 1.2377171543356333, + "learning_rate": 8.135818795383597e-06, + "loss": 0.23994986712932587, + "step": 2356 + }, + { + "epoch": 1.1645866798467812, + "grad_norm": 1.2983017697862052, + "learning_rate": 8.12779205283985e-06, + "loss": 0.2746032476425171, + "step": 2357 + }, + { + "epoch": 1.1650809341406154, + "grad_norm": 1.3938993958898265, + "learning_rate": 8.119766560155377e-06, + "loss": 0.3323846161365509, + "step": 2358 + }, + { + "epoch": 1.1655751884344494, + "grad_norm": 1.3890076094482564, + "learning_rate": 8.111742322687886e-06, + "loss": 0.28155508637428284, + "step": 2359 + }, + { + "epoch": 1.1660694427282836, + "grad_norm": 1.361844276882708, + "learning_rate": 8.103719345794237e-06, + "loss": 0.2936748266220093, + "step": 2360 + }, + { + "epoch": 1.1665636970221178, + "grad_norm": 1.2168650482731003, + "learning_rate": 8.095697634830463e-06, + "loss": 0.23575282096862793, + "step": 2361 + }, + { + "epoch": 1.167057951315952, + "grad_norm": 1.277845029620416, + "learning_rate": 8.087677195151737e-06, + "loss": 0.24547496438026428, + "step": 2362 + }, + { + "epoch": 1.1675522056097862, + "grad_norm": 1.3371291006512767, + "learning_rate": 8.079658032112388e-06, + "loss": 0.2936372458934784, + "step": 2363 + }, + { + "epoch": 1.1680464599036204, + "grad_norm": 1.316297337509115, + "learning_rate": 8.071640151065902e-06, + "loss": 0.28602418303489685, + "step": 2364 + }, + { + "epoch": 1.1685407141974546, + "grad_norm": 1.271542457187923, + "learning_rate": 8.0636235573649e-06, + "loss": 0.2742761969566345, + "step": 2365 + }, + { + "epoch": 1.1690349684912889, + "grad_norm": 1.2379702024007857, + "learning_rate": 8.05560825636114e-06, + "loss": 0.2590268552303314, + "step": 2366 + }, + { + "epoch": 1.169529222785123, + "grad_norm": 1.2195835846594238, + "learning_rate": 8.047594253405525e-06, + "loss": 0.26881399750709534, + "step": 2367 + }, + { + "epoch": 1.170023477078957, + "grad_norm": 1.279205613064969, + "learning_rate": 8.039581553848093e-06, + "loss": 0.27069953083992004, + "step": 2368 + }, + { + "epoch": 1.1705177313727912, + "grad_norm": 1.1650094541250327, + "learning_rate": 8.031570163038005e-06, + "loss": 0.27320611476898193, + "step": 2369 + }, + { + "epoch": 1.1710119856666255, + "grad_norm": 1.289507742767465, + "learning_rate": 8.023560086323548e-06, + "loss": 0.26400327682495117, + "step": 2370 + }, + { + "epoch": 1.1715062399604597, + "grad_norm": 1.1403608861276666, + "learning_rate": 8.015551329052136e-06, + "loss": 0.22287744283676147, + "step": 2371 + }, + { + "epoch": 1.1720004942542939, + "grad_norm": 1.2409841787965832, + "learning_rate": 8.007543896570309e-06, + "loss": 0.28240424394607544, + "step": 2372 + }, + { + "epoch": 1.172494748548128, + "grad_norm": 1.3414402473623117, + "learning_rate": 7.999537794223702e-06, + "loss": 0.27119147777557373, + "step": 2373 + }, + { + "epoch": 1.1729890028419623, + "grad_norm": 1.376418134177551, + "learning_rate": 7.991533027357085e-06, + "loss": 0.2579900920391083, + "step": 2374 + }, + { + "epoch": 1.1734832571357963, + "grad_norm": 1.197547817498857, + "learning_rate": 7.983529601314317e-06, + "loss": 0.25550374388694763, + "step": 2375 + }, + { + "epoch": 1.1739775114296305, + "grad_norm": 1.119102387270249, + "learning_rate": 7.97552752143838e-06, + "loss": 0.21197429299354553, + "step": 2376 + }, + { + "epoch": 1.1744717657234647, + "grad_norm": 1.245296460371477, + "learning_rate": 7.96752679307134e-06, + "loss": 0.28724029660224915, + "step": 2377 + }, + { + "epoch": 1.1749660200172989, + "grad_norm": 1.119081251981291, + "learning_rate": 7.959527421554375e-06, + "loss": 0.24320468306541443, + "step": 2378 + }, + { + "epoch": 1.175460274311133, + "grad_norm": 1.1094352642608503, + "learning_rate": 7.951529412227745e-06, + "loss": 0.22487501800060272, + "step": 2379 + }, + { + "epoch": 1.1759545286049673, + "grad_norm": 1.1424975538486684, + "learning_rate": 7.943532770430811e-06, + "loss": 0.2754969894886017, + "step": 2380 + }, + { + "epoch": 1.1764487828988015, + "grad_norm": 1.2424832323819373, + "learning_rate": 7.93553750150202e-06, + "loss": 0.2734825909137726, + "step": 2381 + }, + { + "epoch": 1.1769430371926357, + "grad_norm": 1.3311172796502668, + "learning_rate": 7.927543610778895e-06, + "loss": 0.2803332209587097, + "step": 2382 + }, + { + "epoch": 1.1774372914864697, + "grad_norm": 1.3572589379934268, + "learning_rate": 7.919551103598037e-06, + "loss": 0.2820316255092621, + "step": 2383 + }, + { + "epoch": 1.177931545780304, + "grad_norm": 1.1984541262238777, + "learning_rate": 7.911559985295142e-06, + "loss": 0.26788315176963806, + "step": 2384 + }, + { + "epoch": 1.178425800074138, + "grad_norm": 1.152974420484647, + "learning_rate": 7.90357026120496e-06, + "loss": 0.2562825083732605, + "step": 2385 + }, + { + "epoch": 1.1789200543679723, + "grad_norm": 1.3733272776027918, + "learning_rate": 7.895581936661316e-06, + "loss": 0.28260675072669983, + "step": 2386 + }, + { + "epoch": 1.1794143086618065, + "grad_norm": 1.2509507258139472, + "learning_rate": 7.887595016997105e-06, + "loss": 0.25887200236320496, + "step": 2387 + }, + { + "epoch": 1.1799085629556407, + "grad_norm": 1.1852436756934879, + "learning_rate": 7.879609507544274e-06, + "loss": 0.2351648062467575, + "step": 2388 + }, + { + "epoch": 1.180402817249475, + "grad_norm": 1.310528017980178, + "learning_rate": 7.871625413633843e-06, + "loss": 0.2958889305591583, + "step": 2389 + }, + { + "epoch": 1.180897071543309, + "grad_norm": 1.260660594043313, + "learning_rate": 7.863642740595873e-06, + "loss": 0.29704710841178894, + "step": 2390 + }, + { + "epoch": 1.1813913258371431, + "grad_norm": 1.1273593973839822, + "learning_rate": 7.855661493759488e-06, + "loss": 0.23283210396766663, + "step": 2391 + }, + { + "epoch": 1.1818855801309773, + "grad_norm": 1.1497387573049556, + "learning_rate": 7.847681678452846e-06, + "loss": 0.22818870842456818, + "step": 2392 + }, + { + "epoch": 1.1823798344248115, + "grad_norm": 1.2334848445567106, + "learning_rate": 7.839703300003163e-06, + "loss": 0.2345077246427536, + "step": 2393 + }, + { + "epoch": 1.1828740887186457, + "grad_norm": 1.3979127898652413, + "learning_rate": 7.831726363736694e-06, + "loss": 0.31161409616470337, + "step": 2394 + }, + { + "epoch": 1.18336834301248, + "grad_norm": 1.3157666615230723, + "learning_rate": 7.823750874978724e-06, + "loss": 0.2958439588546753, + "step": 2395 + }, + { + "epoch": 1.1838625973063142, + "grad_norm": 1.1914805532137183, + "learning_rate": 7.815776839053568e-06, + "loss": 0.24895446002483368, + "step": 2396 + }, + { + "epoch": 1.1843568516001484, + "grad_norm": 1.189611866561264, + "learning_rate": 7.807804261284591e-06, + "loss": 0.2691795825958252, + "step": 2397 + }, + { + "epoch": 1.1848511058939826, + "grad_norm": 1.2282823509277643, + "learning_rate": 7.799833146994165e-06, + "loss": 0.26797783374786377, + "step": 2398 + }, + { + "epoch": 1.1853453601878166, + "grad_norm": 1.2297499766268158, + "learning_rate": 7.791863501503694e-06, + "loss": 0.2665610611438751, + "step": 2399 + }, + { + "epoch": 1.1858396144816508, + "grad_norm": 1.1290863581864232, + "learning_rate": 7.783895330133596e-06, + "loss": 0.24712792038917542, + "step": 2400 + }, + { + "epoch": 1.186333868775485, + "grad_norm": 1.2300895404986125, + "learning_rate": 7.775928638203316e-06, + "loss": 0.24131645262241364, + "step": 2401 + }, + { + "epoch": 1.1868281230693192, + "grad_norm": 1.2566198414342145, + "learning_rate": 7.7679634310313e-06, + "loss": 0.24233923852443695, + "step": 2402 + }, + { + "epoch": 1.1873223773631534, + "grad_norm": 1.2397915401139883, + "learning_rate": 7.759999713935002e-06, + "loss": 0.24929150938987732, + "step": 2403 + }, + { + "epoch": 1.1878166316569876, + "grad_norm": 1.2005274695814647, + "learning_rate": 7.752037492230887e-06, + "loss": 0.266767293214798, + "step": 2404 + }, + { + "epoch": 1.1883108859508218, + "grad_norm": 1.2083997342227277, + "learning_rate": 7.744076771234427e-06, + "loss": 0.257263720035553, + "step": 2405 + }, + { + "epoch": 1.1888051402446558, + "grad_norm": 1.3017758985808945, + "learning_rate": 7.73611755626008e-06, + "loss": 0.26949891448020935, + "step": 2406 + }, + { + "epoch": 1.18929939453849, + "grad_norm": 1.3523825920294412, + "learning_rate": 7.728159852621308e-06, + "loss": 0.250274121761322, + "step": 2407 + }, + { + "epoch": 1.1897936488323242, + "grad_norm": 1.6370645689880403, + "learning_rate": 7.720203665630553e-06, + "loss": 0.2442864030599594, + "step": 2408 + }, + { + "epoch": 1.1902879031261584, + "grad_norm": 1.4258170868908235, + "learning_rate": 7.71224900059926e-06, + "loss": 0.273416131734848, + "step": 2409 + }, + { + "epoch": 1.1907821574199926, + "grad_norm": 1.2547538223250059, + "learning_rate": 7.704295862837845e-06, + "loss": 0.2559645175933838, + "step": 2410 + }, + { + "epoch": 1.1912764117138268, + "grad_norm": 1.3439078919148493, + "learning_rate": 7.696344257655713e-06, + "loss": 0.2793371379375458, + "step": 2411 + }, + { + "epoch": 1.191770666007661, + "grad_norm": 1.1661216324600743, + "learning_rate": 7.688394190361235e-06, + "loss": 0.23739437758922577, + "step": 2412 + }, + { + "epoch": 1.1922649203014952, + "grad_norm": 1.293132062594429, + "learning_rate": 7.680445666261766e-06, + "loss": 0.27027466893196106, + "step": 2413 + }, + { + "epoch": 1.1927591745953292, + "grad_norm": 1.2887121644516222, + "learning_rate": 7.672498690663632e-06, + "loss": 0.2641778886318207, + "step": 2414 + }, + { + "epoch": 1.1932534288891634, + "grad_norm": 1.235898023301149, + "learning_rate": 7.664553268872116e-06, + "loss": 0.25086820125579834, + "step": 2415 + }, + { + "epoch": 1.1937476831829976, + "grad_norm": 1.6761712741491541, + "learning_rate": 7.656609406191467e-06, + "loss": 0.2871254086494446, + "step": 2416 + }, + { + "epoch": 1.1942419374768318, + "grad_norm": 1.193500770631568, + "learning_rate": 7.648667107924893e-06, + "loss": 0.2657528221607208, + "step": 2417 + }, + { + "epoch": 1.194736191770666, + "grad_norm": 1.3739698225148846, + "learning_rate": 7.640726379374564e-06, + "loss": 0.26942694187164307, + "step": 2418 + }, + { + "epoch": 1.1952304460645002, + "grad_norm": 1.1561137180130854, + "learning_rate": 7.632787225841593e-06, + "loss": 0.23883840441703796, + "step": 2419 + }, + { + "epoch": 1.1957247003583344, + "grad_norm": 1.215726770348901, + "learning_rate": 7.624849652626049e-06, + "loss": 0.24837304651737213, + "step": 2420 + }, + { + "epoch": 1.1962189546521684, + "grad_norm": 1.194954932679119, + "learning_rate": 7.616913665026936e-06, + "loss": 0.2882450222969055, + "step": 2421 + }, + { + "epoch": 1.1967132089460026, + "grad_norm": 1.4557191034476904, + "learning_rate": 7.608979268342213e-06, + "loss": 0.25877460837364197, + "step": 2422 + }, + { + "epoch": 1.1972074632398368, + "grad_norm": 1.2343724838571453, + "learning_rate": 7.601046467868767e-06, + "loss": 0.26970750093460083, + "step": 2423 + }, + { + "epoch": 1.197701717533671, + "grad_norm": 1.1598747816375319, + "learning_rate": 7.593115268902423e-06, + "loss": 0.23771706223487854, + "step": 2424 + }, + { + "epoch": 1.1981959718275053, + "grad_norm": 1.1949187968831856, + "learning_rate": 7.585185676737932e-06, + "loss": 0.25420787930488586, + "step": 2425 + }, + { + "epoch": 1.1986902261213395, + "grad_norm": 1.248194263596005, + "learning_rate": 7.577257696668982e-06, + "loss": 0.2551025152206421, + "step": 2426 + }, + { + "epoch": 1.1991844804151737, + "grad_norm": 1.1913659485965633, + "learning_rate": 7.569331333988177e-06, + "loss": 0.2302972972393036, + "step": 2427 + }, + { + "epoch": 1.1996787347090079, + "grad_norm": 1.340176223566515, + "learning_rate": 7.561406593987045e-06, + "loss": 0.25811445713043213, + "step": 2428 + }, + { + "epoch": 1.200172989002842, + "grad_norm": 1.1946803554276415, + "learning_rate": 7.5534834819560235e-06, + "loss": 0.2550782561302185, + "step": 2429 + }, + { + "epoch": 1.200667243296676, + "grad_norm": 1.3588122473637638, + "learning_rate": 7.545562003184474e-06, + "loss": 0.24825535714626312, + "step": 2430 + }, + { + "epoch": 1.2011614975905103, + "grad_norm": 1.3105140055807547, + "learning_rate": 7.537642162960664e-06, + "loss": 0.29703712463378906, + "step": 2431 + }, + { + "epoch": 1.2016557518843445, + "grad_norm": 1.2707072551305245, + "learning_rate": 7.5297239665717625e-06, + "loss": 0.26830747723579407, + "step": 2432 + }, + { + "epoch": 1.2021500061781787, + "grad_norm": 1.2272388404108225, + "learning_rate": 7.521807419303846e-06, + "loss": 0.2428341656923294, + "step": 2433 + }, + { + "epoch": 1.202644260472013, + "grad_norm": 1.3310573803274635, + "learning_rate": 7.513892526441883e-06, + "loss": 0.2843051552772522, + "step": 2434 + }, + { + "epoch": 1.203138514765847, + "grad_norm": 1.297091941411815, + "learning_rate": 7.50597929326975e-06, + "loss": 0.2485228031873703, + "step": 2435 + }, + { + "epoch": 1.203632769059681, + "grad_norm": 1.3716686006321661, + "learning_rate": 7.498067725070206e-06, + "loss": 0.25343626737594604, + "step": 2436 + }, + { + "epoch": 1.2041270233535153, + "grad_norm": 1.3197919626781558, + "learning_rate": 7.490157827124902e-06, + "loss": 0.24906575679779053, + "step": 2437 + }, + { + "epoch": 1.2046212776473495, + "grad_norm": 1.6398204697926184, + "learning_rate": 7.4822496047143665e-06, + "loss": 0.33576443791389465, + "step": 2438 + }, + { + "epoch": 1.2051155319411837, + "grad_norm": 1.341601959864184, + "learning_rate": 7.474343063118023e-06, + "loss": 0.2755683362483978, + "step": 2439 + }, + { + "epoch": 1.205609786235018, + "grad_norm": 1.259839098151577, + "learning_rate": 7.466438207614165e-06, + "loss": 0.2667745351791382, + "step": 2440 + }, + { + "epoch": 1.2061040405288521, + "grad_norm": 1.3942381323272646, + "learning_rate": 7.458535043479959e-06, + "loss": 0.2970271408557892, + "step": 2441 + }, + { + "epoch": 1.2065982948226863, + "grad_norm": 1.2934031608191798, + "learning_rate": 7.450633575991442e-06, + "loss": 0.2628048360347748, + "step": 2442 + }, + { + "epoch": 1.2070925491165205, + "grad_norm": 1.3935428467061275, + "learning_rate": 7.442733810423526e-06, + "loss": 0.29923003911972046, + "step": 2443 + }, + { + "epoch": 1.2075868034103547, + "grad_norm": 1.2121764987473183, + "learning_rate": 7.4348357520499805e-06, + "loss": 0.2486419975757599, + "step": 2444 + }, + { + "epoch": 1.2080810577041887, + "grad_norm": 1.2651423288599317, + "learning_rate": 7.4269394061434315e-06, + "loss": 0.2711118459701538, + "step": 2445 + }, + { + "epoch": 1.208575311998023, + "grad_norm": 1.2689988235231109, + "learning_rate": 7.419044777975371e-06, + "loss": 0.2568815052509308, + "step": 2446 + }, + { + "epoch": 1.2090695662918571, + "grad_norm": 1.3357220203112758, + "learning_rate": 7.411151872816143e-06, + "loss": 0.2546462416648865, + "step": 2447 + }, + { + "epoch": 1.2095638205856913, + "grad_norm": 1.1716595202066384, + "learning_rate": 7.403260695934933e-06, + "loss": 0.23455393314361572, + "step": 2448 + }, + { + "epoch": 1.2100580748795255, + "grad_norm": 1.3263077198790523, + "learning_rate": 7.395371252599779e-06, + "loss": 0.2874235510826111, + "step": 2449 + }, + { + "epoch": 1.2105523291733598, + "grad_norm": 1.2319732877340805, + "learning_rate": 7.387483548077559e-06, + "loss": 0.2462289184331894, + "step": 2450 + }, + { + "epoch": 1.211046583467194, + "grad_norm": 1.381045021384348, + "learning_rate": 7.379597587633998e-06, + "loss": 0.29385364055633545, + "step": 2451 + }, + { + "epoch": 1.211540837761028, + "grad_norm": 1.1902133906710186, + "learning_rate": 7.371713376533642e-06, + "loss": 0.25049760937690735, + "step": 2452 + }, + { + "epoch": 1.2120350920548622, + "grad_norm": 1.267298470174844, + "learning_rate": 7.363830920039887e-06, + "loss": 0.2748974859714508, + "step": 2453 + }, + { + "epoch": 1.2125293463486964, + "grad_norm": 1.2929931198793703, + "learning_rate": 7.355950223414939e-06, + "loss": 0.2707570791244507, + "step": 2454 + }, + { + "epoch": 1.2130236006425306, + "grad_norm": 1.3328464163268134, + "learning_rate": 7.3480712919198474e-06, + "loss": 0.2864024043083191, + "step": 2455 + }, + { + "epoch": 1.2135178549363648, + "grad_norm": 1.400259353784304, + "learning_rate": 7.340194130814466e-06, + "loss": 0.3181900680065155, + "step": 2456 + }, + { + "epoch": 1.214012109230199, + "grad_norm": 1.2994892273470056, + "learning_rate": 7.332318745357483e-06, + "loss": 0.3022974729537964, + "step": 2457 + }, + { + "epoch": 1.2145063635240332, + "grad_norm": 1.2350650698265369, + "learning_rate": 7.324445140806387e-06, + "loss": 0.2850461006164551, + "step": 2458 + }, + { + "epoch": 1.2150006178178674, + "grad_norm": 1.0534315857750147, + "learning_rate": 7.316573322417483e-06, + "loss": 0.21958643198013306, + "step": 2459 + }, + { + "epoch": 1.2154948721117014, + "grad_norm": 1.3531472648001939, + "learning_rate": 7.3087032954458915e-06, + "loss": 0.2517468333244324, + "step": 2460 + }, + { + "epoch": 1.2159891264055356, + "grad_norm": 1.1714370722498957, + "learning_rate": 7.300835065145526e-06, + "loss": 0.26957637071609497, + "step": 2461 + }, + { + "epoch": 1.2164833806993698, + "grad_norm": 1.2755586367674554, + "learning_rate": 7.292968636769103e-06, + "loss": 0.2699058949947357, + "step": 2462 + }, + { + "epoch": 1.216977634993204, + "grad_norm": 1.2382912705778586, + "learning_rate": 7.285104015568138e-06, + "loss": 0.25076431035995483, + "step": 2463 + }, + { + "epoch": 1.2174718892870382, + "grad_norm": 1.2104527847150177, + "learning_rate": 7.277241206792944e-06, + "loss": 0.24862724542617798, + "step": 2464 + }, + { + "epoch": 1.2179661435808724, + "grad_norm": 1.3107261919810722, + "learning_rate": 7.269380215692614e-06, + "loss": 0.27427712082862854, + "step": 2465 + }, + { + "epoch": 1.2184603978747066, + "grad_norm": 1.2946586839730188, + "learning_rate": 7.261521047515041e-06, + "loss": 0.24343061447143555, + "step": 2466 + }, + { + "epoch": 1.2189546521685406, + "grad_norm": 1.1968860231182823, + "learning_rate": 7.253663707506882e-06, + "loss": 0.25482866168022156, + "step": 2467 + }, + { + "epoch": 1.2194489064623748, + "grad_norm": 1.2806570256332481, + "learning_rate": 7.2458082009135964e-06, + "loss": 0.27699458599090576, + "step": 2468 + }, + { + "epoch": 1.219943160756209, + "grad_norm": 1.3000686730507884, + "learning_rate": 7.237954532979401e-06, + "loss": 0.26576149463653564, + "step": 2469 + }, + { + "epoch": 1.2204374150500432, + "grad_norm": 1.2984838025251157, + "learning_rate": 7.230102708947298e-06, + "loss": 0.287861168384552, + "step": 2470 + }, + { + "epoch": 1.2209316693438774, + "grad_norm": 1.2911534198412806, + "learning_rate": 7.2222527340590434e-06, + "loss": 0.25484874844551086, + "step": 2471 + }, + { + "epoch": 1.2214259236377116, + "grad_norm": 1.284847349415858, + "learning_rate": 7.214404613555177e-06, + "loss": 0.26371529698371887, + "step": 2472 + }, + { + "epoch": 1.2219201779315458, + "grad_norm": 1.334957534550205, + "learning_rate": 7.206558352674992e-06, + "loss": 0.23692578077316284, + "step": 2473 + }, + { + "epoch": 1.22241443222538, + "grad_norm": 1.2696744902236006, + "learning_rate": 7.198713956656538e-06, + "loss": 0.26369085907936096, + "step": 2474 + }, + { + "epoch": 1.2229086865192142, + "grad_norm": 1.4374683516439322, + "learning_rate": 7.1908714307366145e-06, + "loss": 0.260580450296402, + "step": 2475 + }, + { + "epoch": 1.2234029408130482, + "grad_norm": 1.280804641850837, + "learning_rate": 7.1830307801507904e-06, + "loss": 0.2693007290363312, + "step": 2476 + }, + { + "epoch": 1.2238971951068824, + "grad_norm": 1.3429546136121409, + "learning_rate": 7.1751920101333695e-06, + "loss": 0.26629775762557983, + "step": 2477 + }, + { + "epoch": 1.2243914494007166, + "grad_norm": 1.3999841706301799, + "learning_rate": 7.167355125917399e-06, + "loss": 0.2963234782218933, + "step": 2478 + }, + { + "epoch": 1.2248857036945509, + "grad_norm": 1.2332551275962955, + "learning_rate": 7.159520132734669e-06, + "loss": 0.24415187537670135, + "step": 2479 + }, + { + "epoch": 1.225379957988385, + "grad_norm": 1.3645078601677985, + "learning_rate": 7.15168703581572e-06, + "loss": 0.2941599190235138, + "step": 2480 + }, + { + "epoch": 1.2258742122822193, + "grad_norm": 1.2551885597461083, + "learning_rate": 7.1438558403898065e-06, + "loss": 0.22807514667510986, + "step": 2481 + }, + { + "epoch": 1.2263684665760535, + "grad_norm": 1.3774209397395383, + "learning_rate": 7.136026551684923e-06, + "loss": 0.28865426778793335, + "step": 2482 + }, + { + "epoch": 1.2268627208698875, + "grad_norm": 1.3250195381886638, + "learning_rate": 7.1281991749277945e-06, + "loss": 0.3015780448913574, + "step": 2483 + }, + { + "epoch": 1.2273569751637217, + "grad_norm": 1.30264219696165, + "learning_rate": 7.12037371534386e-06, + "loss": 0.2521517872810364, + "step": 2484 + }, + { + "epoch": 1.2278512294575559, + "grad_norm": 1.520486974517902, + "learning_rate": 7.1125501781572896e-06, + "loss": 0.2904277443885803, + "step": 2485 + }, + { + "epoch": 1.22834548375139, + "grad_norm": 1.2434155494713983, + "learning_rate": 7.104728568590966e-06, + "loss": 0.26172375679016113, + "step": 2486 + }, + { + "epoch": 1.2288397380452243, + "grad_norm": 1.3588693705399504, + "learning_rate": 7.096908891866483e-06, + "loss": 0.23565448820590973, + "step": 2487 + }, + { + "epoch": 1.2293339923390585, + "grad_norm": 1.276833588621656, + "learning_rate": 7.0890911532041375e-06, + "loss": 0.2550106644630432, + "step": 2488 + }, + { + "epoch": 1.2298282466328927, + "grad_norm": 1.4167484141197517, + "learning_rate": 7.08127535782295e-06, + "loss": 0.3221823573112488, + "step": 2489 + }, + { + "epoch": 1.230322500926727, + "grad_norm": 1.2657124525427264, + "learning_rate": 7.073461510940631e-06, + "loss": 0.26209163665771484, + "step": 2490 + }, + { + "epoch": 1.2308167552205609, + "grad_norm": 1.3626305998908985, + "learning_rate": 7.06564961777359e-06, + "loss": 0.28635868430137634, + "step": 2491 + }, + { + "epoch": 1.231311009514395, + "grad_norm": 1.417027138446056, + "learning_rate": 7.0578396835369355e-06, + "loss": 0.25630202889442444, + "step": 2492 + }, + { + "epoch": 1.2318052638082293, + "grad_norm": 1.233621488661494, + "learning_rate": 7.050031713444474e-06, + "loss": 0.27345454692840576, + "step": 2493 + }, + { + "epoch": 1.2322995181020635, + "grad_norm": 1.2592068756906736, + "learning_rate": 7.042225712708692e-06, + "loss": 0.2365841269493103, + "step": 2494 + }, + { + "epoch": 1.2327937723958977, + "grad_norm": 1.730933189967813, + "learning_rate": 7.03442168654076e-06, + "loss": 0.2891104221343994, + "step": 2495 + }, + { + "epoch": 1.233288026689732, + "grad_norm": 1.3811266669598459, + "learning_rate": 7.026619640150534e-06, + "loss": 0.2713435888290405, + "step": 2496 + }, + { + "epoch": 1.2337822809835661, + "grad_norm": 1.3509192768016722, + "learning_rate": 7.018819578746557e-06, + "loss": 0.28552842140197754, + "step": 2497 + }, + { + "epoch": 1.2342765352774, + "grad_norm": 1.377186562637688, + "learning_rate": 7.011021507536031e-06, + "loss": 0.2731080949306488, + "step": 2498 + }, + { + "epoch": 1.2347707895712343, + "grad_norm": 1.1800591795719682, + "learning_rate": 7.003225431724841e-06, + "loss": 0.27373206615448, + "step": 2499 + }, + { + "epoch": 1.2352650438650685, + "grad_norm": 1.3197536250384188, + "learning_rate": 6.99543135651753e-06, + "loss": 0.24507245421409607, + "step": 2500 + }, + { + "epoch": 1.2357592981589027, + "grad_norm": 1.2680812543691635, + "learning_rate": 6.9876392871173205e-06, + "loss": 0.2653801739215851, + "step": 2501 + }, + { + "epoch": 1.236253552452737, + "grad_norm": 1.115227060544212, + "learning_rate": 6.979849228726079e-06, + "loss": 0.1929643303155899, + "step": 2502 + }, + { + "epoch": 1.2367478067465711, + "grad_norm": 1.330653204132735, + "learning_rate": 6.972061186544341e-06, + "loss": 0.2684918940067291, + "step": 2503 + }, + { + "epoch": 1.2372420610404053, + "grad_norm": 1.2129572179563677, + "learning_rate": 6.964275165771288e-06, + "loss": 0.23158729076385498, + "step": 2504 + }, + { + "epoch": 1.2377363153342396, + "grad_norm": 1.3192284190451669, + "learning_rate": 6.95649117160476e-06, + "loss": 0.24757611751556396, + "step": 2505 + }, + { + "epoch": 1.2382305696280738, + "grad_norm": 1.328208985585749, + "learning_rate": 6.9487092092412425e-06, + "loss": 0.2651844620704651, + "step": 2506 + }, + { + "epoch": 1.2387248239219077, + "grad_norm": 1.3550284074069674, + "learning_rate": 6.940929283875859e-06, + "loss": 0.26745620369911194, + "step": 2507 + }, + { + "epoch": 1.239219078215742, + "grad_norm": 1.2361002758783033, + "learning_rate": 6.933151400702374e-06, + "loss": 0.22088846564292908, + "step": 2508 + }, + { + "epoch": 1.2397133325095762, + "grad_norm": 1.2379679284464757, + "learning_rate": 6.925375564913193e-06, + "loss": 0.2662886381149292, + "step": 2509 + }, + { + "epoch": 1.2402075868034104, + "grad_norm": 1.3634625495618726, + "learning_rate": 6.917601781699357e-06, + "loss": 0.2691834270954132, + "step": 2510 + }, + { + "epoch": 1.2407018410972446, + "grad_norm": 1.1575744185130052, + "learning_rate": 6.909830056250527e-06, + "loss": 0.2110689878463745, + "step": 2511 + }, + { + "epoch": 1.2411960953910788, + "grad_norm": 1.2961548823459923, + "learning_rate": 6.902060393755001e-06, + "loss": 0.29281991720199585, + "step": 2512 + }, + { + "epoch": 1.2416903496849128, + "grad_norm": 1.2724295845366205, + "learning_rate": 6.894292799399688e-06, + "loss": 0.27409040927886963, + "step": 2513 + }, + { + "epoch": 1.242184603978747, + "grad_norm": 1.304980332058365, + "learning_rate": 6.886527278370131e-06, + "loss": 0.29440224170684814, + "step": 2514 + }, + { + "epoch": 1.2426788582725812, + "grad_norm": 1.1224782958445216, + "learning_rate": 6.878763835850475e-06, + "loss": 0.23107948899269104, + "step": 2515 + }, + { + "epoch": 1.2431731125664154, + "grad_norm": 1.55997556893969, + "learning_rate": 6.871002477023488e-06, + "loss": 0.2682652473449707, + "step": 2516 + }, + { + "epoch": 1.2436673668602496, + "grad_norm": 1.2329698948831815, + "learning_rate": 6.863243207070534e-06, + "loss": 0.2935982644557953, + "step": 2517 + }, + { + "epoch": 1.2441616211540838, + "grad_norm": 1.4373018605291157, + "learning_rate": 6.855486031171597e-06, + "loss": 0.29027625918388367, + "step": 2518 + }, + { + "epoch": 1.244655875447918, + "grad_norm": 1.2739101669235458, + "learning_rate": 6.84773095450526e-06, + "loss": 0.25107353925704956, + "step": 2519 + }, + { + "epoch": 1.2451501297417522, + "grad_norm": 1.2325888755211254, + "learning_rate": 6.839977982248697e-06, + "loss": 0.279231995344162, + "step": 2520 + }, + { + "epoch": 1.2456443840355864, + "grad_norm": 1.2006221660421637, + "learning_rate": 6.832227119577677e-06, + "loss": 0.2544802129268646, + "step": 2521 + }, + { + "epoch": 1.2461386383294204, + "grad_norm": 1.397981415575177, + "learning_rate": 6.824478371666573e-06, + "loss": 0.24365633726119995, + "step": 2522 + }, + { + "epoch": 1.2466328926232546, + "grad_norm": 1.1393524200353975, + "learning_rate": 6.816731743688336e-06, + "loss": 0.2673290967941284, + "step": 2523 + }, + { + "epoch": 1.2471271469170888, + "grad_norm": 1.284093438519867, + "learning_rate": 6.808987240814504e-06, + "loss": 0.23896455764770508, + "step": 2524 + }, + { + "epoch": 1.247621401210923, + "grad_norm": 1.200000168994301, + "learning_rate": 6.801244868215192e-06, + "loss": 0.23196406662464142, + "step": 2525 + }, + { + "epoch": 1.2481156555047572, + "grad_norm": 1.2289321548733863, + "learning_rate": 6.793504631059106e-06, + "loss": 0.24249708652496338, + "step": 2526 + }, + { + "epoch": 1.2486099097985914, + "grad_norm": 1.1511217069627229, + "learning_rate": 6.785766534513514e-06, + "loss": 0.2366780787706375, + "step": 2527 + }, + { + "epoch": 1.2491041640924256, + "grad_norm": 1.291146988373714, + "learning_rate": 6.778030583744254e-06, + "loss": 0.2615105211734772, + "step": 2528 + }, + { + "epoch": 1.2495984183862596, + "grad_norm": 1.4688230831159943, + "learning_rate": 6.770296783915738e-06, + "loss": 0.29761314392089844, + "step": 2529 + }, + { + "epoch": 1.2500926726800938, + "grad_norm": 1.2928438568936322, + "learning_rate": 6.762565140190948e-06, + "loss": 0.25020867586135864, + "step": 2530 + }, + { + "epoch": 1.250586926973928, + "grad_norm": 1.3858962507108388, + "learning_rate": 6.754835657731409e-06, + "loss": 0.2716590166091919, + "step": 2531 + }, + { + "epoch": 1.2510811812677622, + "grad_norm": 1.4048062063243787, + "learning_rate": 6.747108341697221e-06, + "loss": 0.27042001485824585, + "step": 2532 + }, + { + "epoch": 1.2515754355615964, + "grad_norm": 1.3297085932201778, + "learning_rate": 6.739383197247023e-06, + "loss": 0.2659035325050354, + "step": 2533 + }, + { + "epoch": 1.2520696898554307, + "grad_norm": 1.3945414928963702, + "learning_rate": 6.731660229538014e-06, + "loss": 0.2803581655025482, + "step": 2534 + }, + { + "epoch": 1.2525639441492649, + "grad_norm": 1.1484885760506975, + "learning_rate": 6.723939443725938e-06, + "loss": 0.24422097206115723, + "step": 2535 + }, + { + "epoch": 1.253058198443099, + "grad_norm": 1.5676789145324774, + "learning_rate": 6.71622084496508e-06, + "loss": 0.30003631114959717, + "step": 2536 + }, + { + "epoch": 1.2535524527369333, + "grad_norm": 1.3207189074013763, + "learning_rate": 6.708504438408265e-06, + "loss": 0.25745317339897156, + "step": 2537 + }, + { + "epoch": 1.2540467070307673, + "grad_norm": 1.3298790802481242, + "learning_rate": 6.700790229206856e-06, + "loss": 0.27648618817329407, + "step": 2538 + }, + { + "epoch": 1.2545409613246015, + "grad_norm": 1.2910375745243117, + "learning_rate": 6.6930782225107536e-06, + "loss": 0.2579975724220276, + "step": 2539 + }, + { + "epoch": 1.2550352156184357, + "grad_norm": 1.3321333943034437, + "learning_rate": 6.68536842346838e-06, + "loss": 0.2806825637817383, + "step": 2540 + }, + { + "epoch": 1.2555294699122699, + "grad_norm": 1.5211080365897773, + "learning_rate": 6.677660837226685e-06, + "loss": 0.2641657888889313, + "step": 2541 + }, + { + "epoch": 1.256023724206104, + "grad_norm": 1.3170844434659201, + "learning_rate": 6.669955468931142e-06, + "loss": 0.25483542680740356, + "step": 2542 + }, + { + "epoch": 1.2565179784999383, + "grad_norm": 1.438596032878092, + "learning_rate": 6.662252323725751e-06, + "loss": 0.264334112405777, + "step": 2543 + }, + { + "epoch": 1.2570122327937723, + "grad_norm": 1.2825942587632855, + "learning_rate": 6.654551406753017e-06, + "loss": 0.2541567385196686, + "step": 2544 + }, + { + "epoch": 1.2575064870876065, + "grad_norm": 1.3007868833040497, + "learning_rate": 6.646852723153965e-06, + "loss": 0.2695424258708954, + "step": 2545 + }, + { + "epoch": 1.2580007413814407, + "grad_norm": 1.2114763710946868, + "learning_rate": 6.63915627806812e-06, + "loss": 0.2694344222545624, + "step": 2546 + }, + { + "epoch": 1.258494995675275, + "grad_norm": 1.3203626104751756, + "learning_rate": 6.631462076633527e-06, + "loss": 0.2695961892604828, + "step": 2547 + }, + { + "epoch": 1.258989249969109, + "grad_norm": 1.43655166025842, + "learning_rate": 6.623770123986719e-06, + "loss": 0.26878753304481506, + "step": 2548 + }, + { + "epoch": 1.2594835042629433, + "grad_norm": 1.4117532208090406, + "learning_rate": 6.616080425262738e-06, + "loss": 0.27568501234054565, + "step": 2549 + }, + { + "epoch": 1.2599777585567775, + "grad_norm": 1.4407785281346286, + "learning_rate": 6.608392985595111e-06, + "loss": 0.2991989254951477, + "step": 2550 + }, + { + "epoch": 1.2604720128506117, + "grad_norm": 1.2938769852574108, + "learning_rate": 6.600707810115869e-06, + "loss": 0.21832239627838135, + "step": 2551 + }, + { + "epoch": 1.260966267144446, + "grad_norm": 1.3528768023288296, + "learning_rate": 6.593024903955525e-06, + "loss": 0.2671685516834259, + "step": 2552 + }, + { + "epoch": 1.26146052143828, + "grad_norm": 1.142061359022944, + "learning_rate": 6.585344272243073e-06, + "loss": 0.23399557173252106, + "step": 2553 + }, + { + "epoch": 1.2619547757321141, + "grad_norm": 1.3000899404630435, + "learning_rate": 6.577665920105996e-06, + "loss": 0.2701990008354187, + "step": 2554 + }, + { + "epoch": 1.2624490300259483, + "grad_norm": 1.216581780326655, + "learning_rate": 6.56998985267025e-06, + "loss": 0.2679189145565033, + "step": 2555 + }, + { + "epoch": 1.2629432843197825, + "grad_norm": 1.3457541131318878, + "learning_rate": 6.562316075060272e-06, + "loss": 0.2597065567970276, + "step": 2556 + }, + { + "epoch": 1.2634375386136167, + "grad_norm": 1.3732680167208262, + "learning_rate": 6.554644592398962e-06, + "loss": 0.2942010462284088, + "step": 2557 + }, + { + "epoch": 1.263931792907451, + "grad_norm": 1.2654921757837638, + "learning_rate": 6.546975409807696e-06, + "loss": 0.2547098994255066, + "step": 2558 + }, + { + "epoch": 1.264426047201285, + "grad_norm": 1.29416806058113, + "learning_rate": 6.539308532406306e-06, + "loss": 0.2779114246368408, + "step": 2559 + }, + { + "epoch": 1.2649203014951191, + "grad_norm": 1.2525651200835928, + "learning_rate": 6.531643965313093e-06, + "loss": 0.22318917512893677, + "step": 2560 + }, + { + "epoch": 1.2654145557889533, + "grad_norm": 1.2931765026229116, + "learning_rate": 6.523981713644814e-06, + "loss": 0.25439128279685974, + "step": 2561 + }, + { + "epoch": 1.2659088100827876, + "grad_norm": 1.1946536852540512, + "learning_rate": 6.516321782516677e-06, + "loss": 0.2317974865436554, + "step": 2562 + }, + { + "epoch": 1.2664030643766218, + "grad_norm": 1.3517228291780166, + "learning_rate": 6.508664177042339e-06, + "loss": 0.273223876953125, + "step": 2563 + }, + { + "epoch": 1.266897318670456, + "grad_norm": 1.3767500694886763, + "learning_rate": 6.501008902333912e-06, + "loss": 0.28408509492874146, + "step": 2564 + }, + { + "epoch": 1.2673915729642902, + "grad_norm": 1.4378995512233899, + "learning_rate": 6.493355963501951e-06, + "loss": 0.2702238857746124, + "step": 2565 + }, + { + "epoch": 1.2678858272581244, + "grad_norm": 1.2819637354130675, + "learning_rate": 6.485705365655441e-06, + "loss": 0.2142164558172226, + "step": 2566 + }, + { + "epoch": 1.2683800815519586, + "grad_norm": 1.4108385899794438, + "learning_rate": 6.478057113901817e-06, + "loss": 0.2654300928115845, + "step": 2567 + }, + { + "epoch": 1.2688743358457928, + "grad_norm": 1.1724627648861543, + "learning_rate": 6.470411213346941e-06, + "loss": 0.24601367115974426, + "step": 2568 + }, + { + "epoch": 1.2693685901396268, + "grad_norm": 1.36613316910106, + "learning_rate": 6.462767669095109e-06, + "loss": 0.26201942563056946, + "step": 2569 + }, + { + "epoch": 1.269862844433461, + "grad_norm": 1.342399065083916, + "learning_rate": 6.455126486249038e-06, + "loss": 0.2839587926864624, + "step": 2570 + }, + { + "epoch": 1.2703570987272952, + "grad_norm": 1.2538564056049797, + "learning_rate": 6.447487669909873e-06, + "loss": 0.21100708842277527, + "step": 2571 + }, + { + "epoch": 1.2708513530211294, + "grad_norm": 1.1457223195177177, + "learning_rate": 6.439851225177185e-06, + "loss": 0.2181582748889923, + "step": 2572 + }, + { + "epoch": 1.2713456073149636, + "grad_norm": 1.397761306307691, + "learning_rate": 6.432217157148948e-06, + "loss": 0.29196488857269287, + "step": 2573 + }, + { + "epoch": 1.2718398616087978, + "grad_norm": 1.3664440708479575, + "learning_rate": 6.424585470921563e-06, + "loss": 0.2365931123495102, + "step": 2574 + }, + { + "epoch": 1.2723341159026318, + "grad_norm": 1.3496940412150429, + "learning_rate": 6.4169561715898255e-06, + "loss": 0.2277393937110901, + "step": 2575 + }, + { + "epoch": 1.272828370196466, + "grad_norm": 1.3624051718280268, + "learning_rate": 6.409329264246956e-06, + "loss": 0.25285032391548157, + "step": 2576 + }, + { + "epoch": 1.2733226244903002, + "grad_norm": 1.2632390853508073, + "learning_rate": 6.401704753984563e-06, + "loss": 0.253650963306427, + "step": 2577 + }, + { + "epoch": 1.2738168787841344, + "grad_norm": 1.264245223392645, + "learning_rate": 6.394082645892668e-06, + "loss": 0.22143784165382385, + "step": 2578 + }, + { + "epoch": 1.2743111330779686, + "grad_norm": 1.3283739907286298, + "learning_rate": 6.3864629450596696e-06, + "loss": 0.27591395378112793, + "step": 2579 + }, + { + "epoch": 1.2748053873718028, + "grad_norm": 1.6236594986793635, + "learning_rate": 6.37884565657238e-06, + "loss": 0.32865333557128906, + "step": 2580 + }, + { + "epoch": 1.275299641665637, + "grad_norm": 1.2172019661301716, + "learning_rate": 6.371230785515992e-06, + "loss": 0.2743702530860901, + "step": 2581 + }, + { + "epoch": 1.2757938959594712, + "grad_norm": 1.2586352823219396, + "learning_rate": 6.3636183369740845e-06, + "loss": 0.23967956006526947, + "step": 2582 + }, + { + "epoch": 1.2762881502533054, + "grad_norm": 1.206746025741565, + "learning_rate": 6.356008316028614e-06, + "loss": 0.2474803626537323, + "step": 2583 + }, + { + "epoch": 1.2767824045471394, + "grad_norm": 1.2591134604976273, + "learning_rate": 6.348400727759925e-06, + "loss": 0.2523267865180969, + "step": 2584 + }, + { + "epoch": 1.2772766588409736, + "grad_norm": 1.3690385191668641, + "learning_rate": 6.340795577246738e-06, + "loss": 0.2549436092376709, + "step": 2585 + }, + { + "epoch": 1.2777709131348078, + "grad_norm": 1.309885921175695, + "learning_rate": 6.333192869566138e-06, + "loss": 0.2602443993091583, + "step": 2586 + }, + { + "epoch": 1.278265167428642, + "grad_norm": 1.248955873440961, + "learning_rate": 6.325592609793588e-06, + "loss": 0.22912462055683136, + "step": 2587 + }, + { + "epoch": 1.2787594217224763, + "grad_norm": 1.3253843576578603, + "learning_rate": 6.317994803002907e-06, + "loss": 0.3004158139228821, + "step": 2588 + }, + { + "epoch": 1.2792536760163105, + "grad_norm": 1.2054603629919527, + "learning_rate": 6.310399454266289e-06, + "loss": 0.25851407647132874, + "step": 2589 + }, + { + "epoch": 1.2797479303101444, + "grad_norm": 1.2857681683589963, + "learning_rate": 6.302806568654277e-06, + "loss": 0.24637526273727417, + "step": 2590 + }, + { + "epoch": 1.2802421846039787, + "grad_norm": 1.2976312908550238, + "learning_rate": 6.295216151235774e-06, + "loss": 0.26500213146209717, + "step": 2591 + }, + { + "epoch": 1.2807364388978129, + "grad_norm": 1.2103490895138174, + "learning_rate": 6.287628207078031e-06, + "loss": 0.24276241660118103, + "step": 2592 + }, + { + "epoch": 1.281230693191647, + "grad_norm": 2.3839558822188787, + "learning_rate": 6.280042741246655e-06, + "loss": 0.27117204666137695, + "step": 2593 + }, + { + "epoch": 1.2817249474854813, + "grad_norm": 1.4461368742366545, + "learning_rate": 6.272459758805596e-06, + "loss": 0.29287856817245483, + "step": 2594 + }, + { + "epoch": 1.2822192017793155, + "grad_norm": 1.4301387064569637, + "learning_rate": 6.26487926481714e-06, + "loss": 0.3065788149833679, + "step": 2595 + }, + { + "epoch": 1.2827134560731497, + "grad_norm": 1.3198078410588965, + "learning_rate": 6.257301264341915e-06, + "loss": 0.2738455533981323, + "step": 2596 + }, + { + "epoch": 1.283207710366984, + "grad_norm": 1.5398007848288653, + "learning_rate": 6.2497257624388915e-06, + "loss": 0.24216318130493164, + "step": 2597 + }, + { + "epoch": 1.283701964660818, + "grad_norm": 1.2565420891983292, + "learning_rate": 6.242152764165368e-06, + "loss": 0.276785671710968, + "step": 2598 + }, + { + "epoch": 1.2841962189546523, + "grad_norm": 1.2307015932000853, + "learning_rate": 6.234582274576961e-06, + "loss": 0.24999365210533142, + "step": 2599 + }, + { + "epoch": 1.2846904732484863, + "grad_norm": 1.2824145770644522, + "learning_rate": 6.227014298727627e-06, + "loss": 0.27714112401008606, + "step": 2600 + }, + { + "epoch": 1.2851847275423205, + "grad_norm": 1.2260344372038856, + "learning_rate": 6.219448841669639e-06, + "loss": 0.2422318160533905, + "step": 2601 + }, + { + "epoch": 1.2856789818361547, + "grad_norm": 1.3255802725159413, + "learning_rate": 6.21188590845359e-06, + "loss": 0.26688697934150696, + "step": 2602 + }, + { + "epoch": 1.286173236129989, + "grad_norm": 1.2753676961687272, + "learning_rate": 6.204325504128379e-06, + "loss": 0.256889671087265, + "step": 2603 + }, + { + "epoch": 1.2866674904238231, + "grad_norm": 1.3013140965176258, + "learning_rate": 6.196767633741225e-06, + "loss": 0.27372461557388306, + "step": 2604 + }, + { + "epoch": 1.287161744717657, + "grad_norm": 1.3064762941978003, + "learning_rate": 6.189212302337663e-06, + "loss": 0.25194403529167175, + "step": 2605 + }, + { + "epoch": 1.2876559990114913, + "grad_norm": 1.2533511197404907, + "learning_rate": 6.181659514961515e-06, + "loss": 0.24381688237190247, + "step": 2606 + }, + { + "epoch": 1.2881502533053255, + "grad_norm": 1.2987400887924563, + "learning_rate": 6.17410927665492e-06, + "loss": 0.255805104970932, + "step": 2607 + }, + { + "epoch": 1.2886445075991597, + "grad_norm": 1.270289405479379, + "learning_rate": 6.166561592458307e-06, + "loss": 0.25070682168006897, + "step": 2608 + }, + { + "epoch": 1.289138761892994, + "grad_norm": 1.1954868388063873, + "learning_rate": 6.159016467410397e-06, + "loss": 0.24080060422420502, + "step": 2609 + }, + { + "epoch": 1.2896330161868281, + "grad_norm": 1.3524298235557053, + "learning_rate": 6.151473906548215e-06, + "loss": 0.28041762113571167, + "step": 2610 + }, + { + "epoch": 1.2901272704806623, + "grad_norm": 1.3891353799265191, + "learning_rate": 6.143933914907065e-06, + "loss": 0.2624273896217346, + "step": 2611 + }, + { + "epoch": 1.2906215247744965, + "grad_norm": 1.3838932352032651, + "learning_rate": 6.136396497520536e-06, + "loss": 0.2658112049102783, + "step": 2612 + }, + { + "epoch": 1.2911157790683307, + "grad_norm": 1.3103712430992434, + "learning_rate": 6.1288616594205e-06, + "loss": 0.27714237570762634, + "step": 2613 + }, + { + "epoch": 1.291610033362165, + "grad_norm": 1.2276105048536776, + "learning_rate": 6.121329405637111e-06, + "loss": 0.23253153264522552, + "step": 2614 + }, + { + "epoch": 1.292104287655999, + "grad_norm": 1.2168125400378236, + "learning_rate": 6.1137997411987915e-06, + "loss": 0.2438409924507141, + "step": 2615 + }, + { + "epoch": 1.2925985419498331, + "grad_norm": 1.3814066274151728, + "learning_rate": 6.106272671132236e-06, + "loss": 0.24013856053352356, + "step": 2616 + }, + { + "epoch": 1.2930927962436674, + "grad_norm": 1.4362282063831207, + "learning_rate": 6.098748200462408e-06, + "loss": 0.2850446403026581, + "step": 2617 + }, + { + "epoch": 1.2935870505375016, + "grad_norm": 1.3403873033762816, + "learning_rate": 6.0912263342125445e-06, + "loss": 0.22195187211036682, + "step": 2618 + }, + { + "epoch": 1.2940813048313358, + "grad_norm": 1.3701004376420556, + "learning_rate": 6.083707077404129e-06, + "loss": 0.29266390204429626, + "step": 2619 + }, + { + "epoch": 1.29457555912517, + "grad_norm": 1.2103981171479565, + "learning_rate": 6.076190435056913e-06, + "loss": 0.26741352677345276, + "step": 2620 + }, + { + "epoch": 1.295069813419004, + "grad_norm": 1.259544042020202, + "learning_rate": 6.068676412188892e-06, + "loss": 0.26014602184295654, + "step": 2621 + }, + { + "epoch": 1.2955640677128382, + "grad_norm": 1.2871395012144142, + "learning_rate": 6.061165013816333e-06, + "loss": 0.2561393976211548, + "step": 2622 + }, + { + "epoch": 1.2960583220066724, + "grad_norm": 1.312678751233067, + "learning_rate": 6.053656244953728e-06, + "loss": 0.2952851951122284, + "step": 2623 + }, + { + "epoch": 1.2965525763005066, + "grad_norm": 1.2817239432203538, + "learning_rate": 6.046150110613831e-06, + "loss": 0.2830423414707184, + "step": 2624 + }, + { + "epoch": 1.2970468305943408, + "grad_norm": 1.2514529269380406, + "learning_rate": 6.038646615807622e-06, + "loss": 0.22306497395038605, + "step": 2625 + }, + { + "epoch": 1.297541084888175, + "grad_norm": 1.3018072981213034, + "learning_rate": 6.031145765544333e-06, + "loss": 0.23291784524917603, + "step": 2626 + }, + { + "epoch": 1.2980353391820092, + "grad_norm": 1.3763927806121403, + "learning_rate": 6.023647564831425e-06, + "loss": 0.2376563400030136, + "step": 2627 + }, + { + "epoch": 1.2985295934758434, + "grad_norm": 1.3283544756021872, + "learning_rate": 6.016152018674588e-06, + "loss": 0.2873516380786896, + "step": 2628 + }, + { + "epoch": 1.2990238477696776, + "grad_norm": 1.2475849952661122, + "learning_rate": 6.00865913207774e-06, + "loss": 0.2416999638080597, + "step": 2629 + }, + { + "epoch": 1.2995181020635118, + "grad_norm": 1.2254304075146119, + "learning_rate": 6.001168910043023e-06, + "loss": 0.2627726197242737, + "step": 2630 + }, + { + "epoch": 1.3000123563573458, + "grad_norm": 1.4025542210635493, + "learning_rate": 5.993681357570809e-06, + "loss": 0.25375279784202576, + "step": 2631 + }, + { + "epoch": 1.30050661065118, + "grad_norm": 1.3348797401747288, + "learning_rate": 5.986196479659676e-06, + "loss": 0.2853030562400818, + "step": 2632 + }, + { + "epoch": 1.3010008649450142, + "grad_norm": 1.3089867713489467, + "learning_rate": 5.978714281306425e-06, + "loss": 0.2626519501209259, + "step": 2633 + }, + { + "epoch": 1.3014951192388484, + "grad_norm": 1.4566011034207051, + "learning_rate": 5.971234767506057e-06, + "loss": 0.2895713448524475, + "step": 2634 + }, + { + "epoch": 1.3019893735326826, + "grad_norm": 1.2504104998957544, + "learning_rate": 5.9637579432518e-06, + "loss": 0.24617832899093628, + "step": 2635 + }, + { + "epoch": 1.3024836278265166, + "grad_norm": 1.2199824881911456, + "learning_rate": 5.956283813535066e-06, + "loss": 0.25497785210609436, + "step": 2636 + }, + { + "epoch": 1.3029778821203508, + "grad_norm": 1.3200409304272294, + "learning_rate": 5.948812383345484e-06, + "loss": 0.25832462310791016, + "step": 2637 + }, + { + "epoch": 1.303472136414185, + "grad_norm": 1.214232538768618, + "learning_rate": 5.941343657670866e-06, + "loss": 0.24273909628391266, + "step": 2638 + }, + { + "epoch": 1.3039663907080192, + "grad_norm": 1.2844572342866962, + "learning_rate": 5.933877641497232e-06, + "loss": 0.2668009400367737, + "step": 2639 + }, + { + "epoch": 1.3044606450018534, + "grad_norm": 1.2388896928667246, + "learning_rate": 5.92641433980879e-06, + "loss": 0.2519373595714569, + "step": 2640 + }, + { + "epoch": 1.3049548992956876, + "grad_norm": 1.3760811135868023, + "learning_rate": 5.918953757587928e-06, + "loss": 0.30091768503189087, + "step": 2641 + }, + { + "epoch": 1.3054491535895218, + "grad_norm": 1.207587317973019, + "learning_rate": 5.911495899815225e-06, + "loss": 0.2504241466522217, + "step": 2642 + }, + { + "epoch": 1.305943407883356, + "grad_norm": 1.1902656490822856, + "learning_rate": 5.904040771469444e-06, + "loss": 0.24741190671920776, + "step": 2643 + }, + { + "epoch": 1.3064376621771903, + "grad_norm": 1.2559463008488698, + "learning_rate": 5.896588377527519e-06, + "loss": 0.2636350691318512, + "step": 2644 + }, + { + "epoch": 1.3069319164710245, + "grad_norm": 1.1981836589630794, + "learning_rate": 5.889138722964563e-06, + "loss": 0.22512421011924744, + "step": 2645 + }, + { + "epoch": 1.3074261707648585, + "grad_norm": 1.2451009493990417, + "learning_rate": 5.8816918127538546e-06, + "loss": 0.26447975635528564, + "step": 2646 + }, + { + "epoch": 1.3079204250586927, + "grad_norm": 1.1839899877527418, + "learning_rate": 5.874247651866853e-06, + "loss": 0.22084996104240417, + "step": 2647 + }, + { + "epoch": 1.3084146793525269, + "grad_norm": 1.2774730254159221, + "learning_rate": 5.8668062452731715e-06, + "loss": 0.24033552408218384, + "step": 2648 + }, + { + "epoch": 1.308908933646361, + "grad_norm": 1.305578072115893, + "learning_rate": 5.8593675979405795e-06, + "loss": 0.24829509854316711, + "step": 2649 + }, + { + "epoch": 1.3094031879401953, + "grad_norm": 1.3651670637998603, + "learning_rate": 5.851931714835016e-06, + "loss": 0.29011303186416626, + "step": 2650 + }, + { + "epoch": 1.3098974422340295, + "grad_norm": 1.3474001783390817, + "learning_rate": 5.8444986009205754e-06, + "loss": 0.273196280002594, + "step": 2651 + }, + { + "epoch": 1.3103916965278635, + "grad_norm": 1.4905929158728624, + "learning_rate": 5.837068261159491e-06, + "loss": 0.28843480348587036, + "step": 2652 + }, + { + "epoch": 1.3108859508216977, + "grad_norm": 1.297768951304802, + "learning_rate": 5.829640700512159e-06, + "loss": 0.25919461250305176, + "step": 2653 + }, + { + "epoch": 1.3113802051155319, + "grad_norm": 1.4662917967499176, + "learning_rate": 5.822215923937105e-06, + "loss": 0.24588480591773987, + "step": 2654 + }, + { + "epoch": 1.311874459409366, + "grad_norm": 1.4533199098003418, + "learning_rate": 5.814793936391001e-06, + "loss": 0.26138943433761597, + "step": 2655 + }, + { + "epoch": 1.3123687137032003, + "grad_norm": 1.3892499103405112, + "learning_rate": 5.807374742828675e-06, + "loss": 0.2740943729877472, + "step": 2656 + }, + { + "epoch": 1.3128629679970345, + "grad_norm": 1.2689667055719156, + "learning_rate": 5.7999583482030605e-06, + "loss": 0.2307349294424057, + "step": 2657 + }, + { + "epoch": 1.3133572222908687, + "grad_norm": 1.4143938245126972, + "learning_rate": 5.792544757465242e-06, + "loss": 0.28424161672592163, + "step": 2658 + }, + { + "epoch": 1.313851476584703, + "grad_norm": 1.1640800807114133, + "learning_rate": 5.785133975564426e-06, + "loss": 0.2586106061935425, + "step": 2659 + }, + { + "epoch": 1.3143457308785371, + "grad_norm": 1.3677717868907802, + "learning_rate": 5.7777260074479455e-06, + "loss": 0.23268333077430725, + "step": 2660 + }, + { + "epoch": 1.314839985172371, + "grad_norm": 1.3455357811423037, + "learning_rate": 5.770320858061254e-06, + "loss": 0.22144779562950134, + "step": 2661 + }, + { + "epoch": 1.3153342394662053, + "grad_norm": 1.1539678539958322, + "learning_rate": 5.762918532347925e-06, + "loss": 0.2450334131717682, + "step": 2662 + }, + { + "epoch": 1.3158284937600395, + "grad_norm": 1.3018328480210146, + "learning_rate": 5.7555190352496375e-06, + "loss": 0.24483400583267212, + "step": 2663 + }, + { + "epoch": 1.3163227480538737, + "grad_norm": 1.484825672376601, + "learning_rate": 5.748122371706198e-06, + "loss": 0.2590720057487488, + "step": 2664 + }, + { + "epoch": 1.316817002347708, + "grad_norm": 1.5800261617865896, + "learning_rate": 5.740728546655515e-06, + "loss": 0.27116847038269043, + "step": 2665 + }, + { + "epoch": 1.3173112566415421, + "grad_norm": 1.3133697295364004, + "learning_rate": 5.733337565033595e-06, + "loss": 0.2720273435115814, + "step": 2666 + }, + { + "epoch": 1.3178055109353761, + "grad_norm": 1.223977156924706, + "learning_rate": 5.7259494317745514e-06, + "loss": 0.22150173783302307, + "step": 2667 + }, + { + "epoch": 1.3182997652292103, + "grad_norm": 1.2771960781536442, + "learning_rate": 5.718564151810597e-06, + "loss": 0.27474984526634216, + "step": 2668 + }, + { + "epoch": 1.3187940195230445, + "grad_norm": 1.3886425317966573, + "learning_rate": 5.711181730072044e-06, + "loss": 0.2547265291213989, + "step": 2669 + }, + { + "epoch": 1.3192882738168787, + "grad_norm": 1.3822455459704068, + "learning_rate": 5.703802171487286e-06, + "loss": 0.2686036229133606, + "step": 2670 + }, + { + "epoch": 1.319782528110713, + "grad_norm": 1.2588694556349689, + "learning_rate": 5.696425480982814e-06, + "loss": 0.2276458591222763, + "step": 2671 + }, + { + "epoch": 1.3202767824045472, + "grad_norm": 1.3366284026803796, + "learning_rate": 5.6890516634832e-06, + "loss": 0.25005075335502625, + "step": 2672 + }, + { + "epoch": 1.3207710366983814, + "grad_norm": 1.4092724528348008, + "learning_rate": 5.681680723911104e-06, + "loss": 0.25919869542121887, + "step": 2673 + }, + { + "epoch": 1.3212652909922156, + "grad_norm": 1.3254224112633677, + "learning_rate": 5.6743126671872505e-06, + "loss": 0.2684757709503174, + "step": 2674 + }, + { + "epoch": 1.3217595452860498, + "grad_norm": 1.2529305606098464, + "learning_rate": 5.666947498230451e-06, + "loss": 0.2554991543292999, + "step": 2675 + }, + { + "epoch": 1.322253799579884, + "grad_norm": 1.3734571061597927, + "learning_rate": 5.6595852219575975e-06, + "loss": 0.27026665210723877, + "step": 2676 + }, + { + "epoch": 1.322748053873718, + "grad_norm": 1.5029882994051502, + "learning_rate": 5.652225843283629e-06, + "loss": 0.3248092234134674, + "step": 2677 + }, + { + "epoch": 1.3232423081675522, + "grad_norm": 1.3299866785479277, + "learning_rate": 5.644869367121564e-06, + "loss": 0.2554503083229065, + "step": 2678 + }, + { + "epoch": 1.3237365624613864, + "grad_norm": 1.4099986774485116, + "learning_rate": 5.637515798382488e-06, + "loss": 0.25482693314552307, + "step": 2679 + }, + { + "epoch": 1.3242308167552206, + "grad_norm": 1.268292092612611, + "learning_rate": 5.630165141975523e-06, + "loss": 0.24664446711540222, + "step": 2680 + }, + { + "epoch": 1.3247250710490548, + "grad_norm": 1.3674712589344702, + "learning_rate": 5.622817402807879e-06, + "loss": 0.23855865001678467, + "step": 2681 + }, + { + "epoch": 1.325219325342889, + "grad_norm": 1.28659959156705, + "learning_rate": 5.615472585784796e-06, + "loss": 0.2847699820995331, + "step": 2682 + }, + { + "epoch": 1.325713579636723, + "grad_norm": 1.3902791844570088, + "learning_rate": 5.608130695809564e-06, + "loss": 0.2705647051334381, + "step": 2683 + }, + { + "epoch": 1.3262078339305572, + "grad_norm": 1.3726972299660716, + "learning_rate": 5.600791737783523e-06, + "loss": 0.30135318636894226, + "step": 2684 + }, + { + "epoch": 1.3267020882243914, + "grad_norm": 1.3006770767718296, + "learning_rate": 5.593455716606069e-06, + "loss": 0.261536180973053, + "step": 2685 + }, + { + "epoch": 1.3271963425182256, + "grad_norm": 1.2249107195075626, + "learning_rate": 5.586122637174614e-06, + "loss": 0.24006187915802002, + "step": 2686 + }, + { + "epoch": 1.3276905968120598, + "grad_norm": 1.2887498899635654, + "learning_rate": 5.578792504384618e-06, + "loss": 0.27928346395492554, + "step": 2687 + }, + { + "epoch": 1.328184851105894, + "grad_norm": 1.1715759673643904, + "learning_rate": 5.5714653231295745e-06, + "loss": 0.24134980142116547, + "step": 2688 + }, + { + "epoch": 1.3286791053997282, + "grad_norm": 1.2633540397916776, + "learning_rate": 5.5641410983010055e-06, + "loss": 0.27914801239967346, + "step": 2689 + }, + { + "epoch": 1.3291733596935624, + "grad_norm": 1.6348254119913803, + "learning_rate": 5.55681983478846e-06, + "loss": 0.2735476493835449, + "step": 2690 + }, + { + "epoch": 1.3296676139873966, + "grad_norm": 1.3384777828423575, + "learning_rate": 5.549501537479511e-06, + "loss": 0.24919739365577698, + "step": 2691 + }, + { + "epoch": 1.3301618682812306, + "grad_norm": 1.430948519009228, + "learning_rate": 5.542186211259737e-06, + "loss": 0.25435787439346313, + "step": 2692 + }, + { + "epoch": 1.3306561225750648, + "grad_norm": 1.2533415908145504, + "learning_rate": 5.534873861012763e-06, + "loss": 0.2502862811088562, + "step": 2693 + }, + { + "epoch": 1.331150376868899, + "grad_norm": 1.5771700033159861, + "learning_rate": 5.527564491620195e-06, + "loss": 0.25752580165863037, + "step": 2694 + }, + { + "epoch": 1.3316446311627332, + "grad_norm": 1.332305251527839, + "learning_rate": 5.520258107961671e-06, + "loss": 0.22301846742630005, + "step": 2695 + }, + { + "epoch": 1.3321388854565674, + "grad_norm": 1.4890781870784164, + "learning_rate": 5.512954714914825e-06, + "loss": 0.24581964313983917, + "step": 2696 + }, + { + "epoch": 1.3326331397504017, + "grad_norm": 1.3113609641171107, + "learning_rate": 5.5056543173553e-06, + "loss": 0.271970272064209, + "step": 2697 + }, + { + "epoch": 1.3331273940442356, + "grad_norm": 1.2432947451070444, + "learning_rate": 5.498356920156735e-06, + "loss": 0.23041053116321564, + "step": 2698 + }, + { + "epoch": 1.3336216483380698, + "grad_norm": 1.3239879393507852, + "learning_rate": 5.491062528190775e-06, + "loss": 0.2338491678237915, + "step": 2699 + }, + { + "epoch": 1.334115902631904, + "grad_norm": 1.3971989589857847, + "learning_rate": 5.483771146327037e-06, + "loss": 0.2667239010334015, + "step": 2700 + }, + { + "epoch": 1.3346101569257383, + "grad_norm": 1.1737606299055239, + "learning_rate": 5.4764827794331586e-06, + "loss": 0.24761441349983215, + "step": 2701 + }, + { + "epoch": 1.3351044112195725, + "grad_norm": 1.2384835240862428, + "learning_rate": 5.469197432374747e-06, + "loss": 0.24087639153003693, + "step": 2702 + }, + { + "epoch": 1.3355986655134067, + "grad_norm": 1.3287799587341789, + "learning_rate": 5.461915110015386e-06, + "loss": 0.26774898171424866, + "step": 2703 + }, + { + "epoch": 1.3360929198072409, + "grad_norm": 1.432719946516567, + "learning_rate": 5.454635817216658e-06, + "loss": 0.2820417284965515, + "step": 2704 + }, + { + "epoch": 1.336587174101075, + "grad_norm": 1.408646831955897, + "learning_rate": 5.447359558838113e-06, + "loss": 0.2891086935997009, + "step": 2705 + }, + { + "epoch": 1.3370814283949093, + "grad_norm": 1.370327694474157, + "learning_rate": 5.440086339737277e-06, + "loss": 0.24551361799240112, + "step": 2706 + }, + { + "epoch": 1.3375756826887435, + "grad_norm": 1.3889596017030068, + "learning_rate": 5.432816164769648e-06, + "loss": 0.2293522208929062, + "step": 2707 + }, + { + "epoch": 1.3380699369825775, + "grad_norm": 1.255610549812546, + "learning_rate": 5.425549038788693e-06, + "loss": 0.22325105965137482, + "step": 2708 + }, + { + "epoch": 1.3385641912764117, + "grad_norm": 1.3152207031427636, + "learning_rate": 5.4182849666458315e-06, + "loss": 0.2263861447572708, + "step": 2709 + }, + { + "epoch": 1.339058445570246, + "grad_norm": 1.2663328789435477, + "learning_rate": 5.411023953190466e-06, + "loss": 0.26902303099632263, + "step": 2710 + }, + { + "epoch": 1.33955269986408, + "grad_norm": 1.4136099878472004, + "learning_rate": 5.403766003269944e-06, + "loss": 0.26154825091362, + "step": 2711 + }, + { + "epoch": 1.3400469541579143, + "grad_norm": 1.32960722740892, + "learning_rate": 5.396511121729562e-06, + "loss": 0.2878270745277405, + "step": 2712 + }, + { + "epoch": 1.3405412084517483, + "grad_norm": 1.3136699200223048, + "learning_rate": 5.389259313412581e-06, + "loss": 0.26206687092781067, + "step": 2713 + }, + { + "epoch": 1.3410354627455825, + "grad_norm": 1.4998302342686003, + "learning_rate": 5.382010583160201e-06, + "loss": 0.25612518191337585, + "step": 2714 + }, + { + "epoch": 1.3415297170394167, + "grad_norm": 1.2688327982594605, + "learning_rate": 5.374764935811574e-06, + "loss": 0.25600868463516235, + "step": 2715 + }, + { + "epoch": 1.342023971333251, + "grad_norm": 1.274882827976935, + "learning_rate": 5.367522376203787e-06, + "loss": 0.24837616086006165, + "step": 2716 + }, + { + "epoch": 1.3425182256270851, + "grad_norm": 1.2814047275641038, + "learning_rate": 5.360282909171875e-06, + "loss": 0.23487885296344757, + "step": 2717 + }, + { + "epoch": 1.3430124799209193, + "grad_norm": 1.2024219184737237, + "learning_rate": 5.353046539548797e-06, + "loss": 0.22786842286586761, + "step": 2718 + }, + { + "epoch": 1.3435067342147535, + "grad_norm": 1.288373437821988, + "learning_rate": 5.3458132721654564e-06, + "loss": 0.2198137640953064, + "step": 2719 + }, + { + "epoch": 1.3440009885085877, + "grad_norm": 1.157338464361865, + "learning_rate": 5.338583111850671e-06, + "loss": 0.20056495070457458, + "step": 2720 + }, + { + "epoch": 1.344495242802422, + "grad_norm": 1.2341328448147324, + "learning_rate": 5.331356063431195e-06, + "loss": 0.21636295318603516, + "step": 2721 + }, + { + "epoch": 1.3449894970962561, + "grad_norm": 1.2390666617057948, + "learning_rate": 5.32413213173171e-06, + "loss": 0.23933230340480804, + "step": 2722 + }, + { + "epoch": 1.3454837513900901, + "grad_norm": 1.3024836233276083, + "learning_rate": 5.316911321574799e-06, + "loss": 0.2402106523513794, + "step": 2723 + }, + { + "epoch": 1.3459780056839243, + "grad_norm": 1.252933113923405, + "learning_rate": 5.309693637780979e-06, + "loss": 0.22524669766426086, + "step": 2724 + }, + { + "epoch": 1.3464722599777585, + "grad_norm": 1.3140972939485838, + "learning_rate": 5.302479085168668e-06, + "loss": 0.25381600856781006, + "step": 2725 + }, + { + "epoch": 1.3469665142715928, + "grad_norm": 1.2857997911307526, + "learning_rate": 5.295267668554202e-06, + "loss": 0.2614738643169403, + "step": 2726 + }, + { + "epoch": 1.347460768565427, + "grad_norm": 8.575818718402259, + "learning_rate": 5.288059392751817e-06, + "loss": 0.2701472043991089, + "step": 2727 + }, + { + "epoch": 1.3479550228592612, + "grad_norm": 1.378318405059408, + "learning_rate": 5.280854262573661e-06, + "loss": 0.2788996696472168, + "step": 2728 + }, + { + "epoch": 1.3484492771530951, + "grad_norm": 1.2759693341337726, + "learning_rate": 5.273652282829764e-06, + "loss": 0.2419927418231964, + "step": 2729 + }, + { + "epoch": 1.3489435314469294, + "grad_norm": 1.4943656047554885, + "learning_rate": 5.266453458328071e-06, + "loss": 0.26454097032546997, + "step": 2730 + }, + { + "epoch": 1.3494377857407636, + "grad_norm": 1.3109211241308218, + "learning_rate": 5.259257793874421e-06, + "loss": 0.24090510606765747, + "step": 2731 + }, + { + "epoch": 1.3499320400345978, + "grad_norm": 1.3390086912520884, + "learning_rate": 5.252065294272528e-06, + "loss": 0.27343428134918213, + "step": 2732 + }, + { + "epoch": 1.350426294328432, + "grad_norm": 1.3272957509132868, + "learning_rate": 5.244875964324005e-06, + "loss": 0.2623448967933655, + "step": 2733 + }, + { + "epoch": 1.3509205486222662, + "grad_norm": 1.2273005978142049, + "learning_rate": 5.237689808828346e-06, + "loss": 0.22721052169799805, + "step": 2734 + }, + { + "epoch": 1.3514148029161004, + "grad_norm": 1.4111267721919942, + "learning_rate": 5.230506832582924e-06, + "loss": 0.26385387778282166, + "step": 2735 + }, + { + "epoch": 1.3519090572099346, + "grad_norm": 1.4309565613654673, + "learning_rate": 5.223327040382995e-06, + "loss": 0.2679533064365387, + "step": 2736 + }, + { + "epoch": 1.3524033115037688, + "grad_norm": 1.285385576934023, + "learning_rate": 5.2161504370216855e-06, + "loss": 0.25042447447776794, + "step": 2737 + }, + { + "epoch": 1.3528975657976028, + "grad_norm": 1.3420398780717075, + "learning_rate": 5.2089770272899845e-06, + "loss": 0.22735297679901123, + "step": 2738 + }, + { + "epoch": 1.353391820091437, + "grad_norm": 1.2715261749804811, + "learning_rate": 5.201806815976772e-06, + "loss": 0.25517284870147705, + "step": 2739 + }, + { + "epoch": 1.3538860743852712, + "grad_norm": 1.4834789867138143, + "learning_rate": 5.194639807868767e-06, + "loss": 0.2942652702331543, + "step": 2740 + }, + { + "epoch": 1.3543803286791054, + "grad_norm": 1.2535180106339032, + "learning_rate": 5.187476007750567e-06, + "loss": 0.2605661153793335, + "step": 2741 + }, + { + "epoch": 1.3548745829729396, + "grad_norm": 1.34702814682356, + "learning_rate": 5.1803154204046215e-06, + "loss": 0.22976648807525635, + "step": 2742 + }, + { + "epoch": 1.3553688372667738, + "grad_norm": 1.2786328684416228, + "learning_rate": 5.173158050611236e-06, + "loss": 0.24301470816135406, + "step": 2743 + }, + { + "epoch": 1.3558630915606078, + "grad_norm": 1.3509518199555386, + "learning_rate": 5.166003903148568e-06, + "loss": 0.2714199125766754, + "step": 2744 + }, + { + "epoch": 1.356357345854442, + "grad_norm": 1.4130809131188478, + "learning_rate": 5.15885298279263e-06, + "loss": 0.27004045248031616, + "step": 2745 + }, + { + "epoch": 1.3568516001482762, + "grad_norm": 1.1866112739948385, + "learning_rate": 5.151705294317262e-06, + "loss": 0.2062053680419922, + "step": 2746 + }, + { + "epoch": 1.3573458544421104, + "grad_norm": 1.3476275860643891, + "learning_rate": 5.144560842494168e-06, + "loss": 0.2589803636074066, + "step": 2747 + }, + { + "epoch": 1.3578401087359446, + "grad_norm": 1.4207662826517113, + "learning_rate": 5.137419632092886e-06, + "loss": 0.26469242572784424, + "step": 2748 + }, + { + "epoch": 1.3583343630297788, + "grad_norm": 1.217607994018294, + "learning_rate": 5.130281667880774e-06, + "loss": 0.26241326332092285, + "step": 2749 + }, + { + "epoch": 1.358828617323613, + "grad_norm": 1.375829317891462, + "learning_rate": 5.123146954623038e-06, + "loss": 0.2674810290336609, + "step": 2750 + }, + { + "epoch": 1.3593228716174472, + "grad_norm": 1.3872924823998294, + "learning_rate": 5.116015497082719e-06, + "loss": 0.23186063766479492, + "step": 2751 + }, + { + "epoch": 1.3598171259112815, + "grad_norm": 1.3207469475464653, + "learning_rate": 5.108887300020669e-06, + "loss": 0.2794165313243866, + "step": 2752 + }, + { + "epoch": 1.3603113802051157, + "grad_norm": 1.2682065300683938, + "learning_rate": 5.1017623681955705e-06, + "loss": 0.25263023376464844, + "step": 2753 + }, + { + "epoch": 1.3608056344989496, + "grad_norm": 1.385223404499901, + "learning_rate": 5.0946407063639315e-06, + "loss": 0.2503500282764435, + "step": 2754 + }, + { + "epoch": 1.3612998887927839, + "grad_norm": 1.1490078969357793, + "learning_rate": 5.087522319280061e-06, + "loss": 0.21871569752693176, + "step": 2755 + }, + { + "epoch": 1.361794143086618, + "grad_norm": 1.3919853358310244, + "learning_rate": 5.080407211696103e-06, + "loss": 0.2790142893791199, + "step": 2756 + }, + { + "epoch": 1.3622883973804523, + "grad_norm": 1.3837841689522787, + "learning_rate": 5.073295388362003e-06, + "loss": 0.27197304368019104, + "step": 2757 + }, + { + "epoch": 1.3627826516742865, + "grad_norm": 1.3248855835987599, + "learning_rate": 5.066186854025502e-06, + "loss": 0.2402152568101883, + "step": 2758 + }, + { + "epoch": 1.3632769059681207, + "grad_norm": 1.3193984824612894, + "learning_rate": 5.059081613432162e-06, + "loss": 0.24418887495994568, + "step": 2759 + }, + { + "epoch": 1.3637711602619547, + "grad_norm": 1.1840901033348532, + "learning_rate": 5.05197967132534e-06, + "loss": 0.2239491045475006, + "step": 2760 + }, + { + "epoch": 1.3642654145557889, + "grad_norm": 1.3401183348354848, + "learning_rate": 5.044881032446192e-06, + "loss": 0.25177091360092163, + "step": 2761 + }, + { + "epoch": 1.364759668849623, + "grad_norm": 1.2524679914953787, + "learning_rate": 5.0377857015336655e-06, + "loss": 0.25462138652801514, + "step": 2762 + }, + { + "epoch": 1.3652539231434573, + "grad_norm": 1.154660335850044, + "learning_rate": 5.0306936833245034e-06, + "loss": 0.21030092239379883, + "step": 2763 + }, + { + "epoch": 1.3657481774372915, + "grad_norm": 1.2778480955324765, + "learning_rate": 5.0236049825532355e-06, + "loss": 0.24033348262310028, + "step": 2764 + }, + { + "epoch": 1.3662424317311257, + "grad_norm": 1.2874693424331807, + "learning_rate": 5.016519603952177e-06, + "loss": 0.20803815126419067, + "step": 2765 + }, + { + "epoch": 1.36673668602496, + "grad_norm": 1.3360777408248645, + "learning_rate": 5.00943755225143e-06, + "loss": 0.21589599549770355, + "step": 2766 + }, + { + "epoch": 1.367230940318794, + "grad_norm": 1.3112690340132882, + "learning_rate": 5.00235883217886e-06, + "loss": 0.2690975069999695, + "step": 2767 + }, + { + "epoch": 1.3677251946126283, + "grad_norm": 1.395793399890879, + "learning_rate": 4.995283448460131e-06, + "loss": 0.2368423044681549, + "step": 2768 + }, + { + "epoch": 1.3682194489064623, + "grad_norm": 1.428306560095472, + "learning_rate": 4.988211405818661e-06, + "loss": 0.2801262140274048, + "step": 2769 + }, + { + "epoch": 1.3687137032002965, + "grad_norm": 1.4209027545437471, + "learning_rate": 4.981142708975647e-06, + "loss": 0.2777586877346039, + "step": 2770 + }, + { + "epoch": 1.3692079574941307, + "grad_norm": 1.1921679323806382, + "learning_rate": 4.97407736265005e-06, + "loss": 0.2400980144739151, + "step": 2771 + }, + { + "epoch": 1.369702211787965, + "grad_norm": 1.233538906022963, + "learning_rate": 4.967015371558592e-06, + "loss": 0.2513861358165741, + "step": 2772 + }, + { + "epoch": 1.3701964660817991, + "grad_norm": 1.2944813845771217, + "learning_rate": 4.959956740415761e-06, + "loss": 0.2785816490650177, + "step": 2773 + }, + { + "epoch": 1.3706907203756333, + "grad_norm": 1.456856079389265, + "learning_rate": 4.9529014739338e-06, + "loss": 0.29092347621917725, + "step": 2774 + }, + { + "epoch": 1.3711849746694673, + "grad_norm": 1.3133832748237033, + "learning_rate": 4.945849576822693e-06, + "loss": 0.27067384123802185, + "step": 2775 + }, + { + "epoch": 1.3716792289633015, + "grad_norm": 1.3000530351478699, + "learning_rate": 4.938801053790199e-06, + "loss": 0.21500205993652344, + "step": 2776 + }, + { + "epoch": 1.3721734832571357, + "grad_norm": 1.2838621226635265, + "learning_rate": 4.931755909541808e-06, + "loss": 0.2422936111688614, + "step": 2777 + }, + { + "epoch": 1.37266773755097, + "grad_norm": 1.3694112071584477, + "learning_rate": 4.9247141487807515e-06, + "loss": 0.2760060727596283, + "step": 2778 + }, + { + "epoch": 1.3731619918448041, + "grad_norm": 1.39746625445185, + "learning_rate": 4.917675776208013e-06, + "loss": 0.22626326978206635, + "step": 2779 + }, + { + "epoch": 1.3736562461386383, + "grad_norm": 1.34096746485375, + "learning_rate": 4.910640796522308e-06, + "loss": 0.23023411631584167, + "step": 2780 + }, + { + "epoch": 1.3741505004324726, + "grad_norm": 1.29137003736815, + "learning_rate": 4.903609214420088e-06, + "loss": 0.22157053649425507, + "step": 2781 + }, + { + "epoch": 1.3746447547263068, + "grad_norm": 1.1801851543310786, + "learning_rate": 4.89658103459554e-06, + "loss": 0.24125584959983826, + "step": 2782 + }, + { + "epoch": 1.375139009020141, + "grad_norm": 1.3517508821088553, + "learning_rate": 4.889556261740578e-06, + "loss": 0.26294079422950745, + "step": 2783 + }, + { + "epoch": 1.3756332633139752, + "grad_norm": 1.2726719724151299, + "learning_rate": 4.882534900544829e-06, + "loss": 0.25327497720718384, + "step": 2784 + }, + { + "epoch": 1.3761275176078092, + "grad_norm": 1.2868199846308948, + "learning_rate": 4.875516955695663e-06, + "loss": 0.2716723084449768, + "step": 2785 + }, + { + "epoch": 1.3766217719016434, + "grad_norm": 1.4619117882899046, + "learning_rate": 4.8685024318781615e-06, + "loss": 0.2889532446861267, + "step": 2786 + }, + { + "epoch": 1.3771160261954776, + "grad_norm": 1.2622088454697893, + "learning_rate": 4.861491333775114e-06, + "loss": 0.23743030428886414, + "step": 2787 + }, + { + "epoch": 1.3776102804893118, + "grad_norm": 1.2912517641324606, + "learning_rate": 4.8544836660670305e-06, + "loss": 0.27180567383766174, + "step": 2788 + }, + { + "epoch": 1.378104534783146, + "grad_norm": 1.3376004646586275, + "learning_rate": 4.847479433432131e-06, + "loss": 0.2549944221973419, + "step": 2789 + }, + { + "epoch": 1.37859878907698, + "grad_norm": 1.189305404121555, + "learning_rate": 4.8404786405463414e-06, + "loss": 0.24112319946289062, + "step": 2790 + }, + { + "epoch": 1.3790930433708142, + "grad_norm": 1.1833978049698726, + "learning_rate": 4.833481292083291e-06, + "loss": 0.22865869104862213, + "step": 2791 + }, + { + "epoch": 1.3795872976646484, + "grad_norm": 1.268697923498799, + "learning_rate": 4.82648739271431e-06, + "loss": 0.24851003289222717, + "step": 2792 + }, + { + "epoch": 1.3800815519584826, + "grad_norm": 1.2931223721765053, + "learning_rate": 4.819496947108424e-06, + "loss": 0.251456081867218, + "step": 2793 + }, + { + "epoch": 1.3805758062523168, + "grad_norm": 1.4758961733623657, + "learning_rate": 4.81250995993236e-06, + "loss": 0.31711041927337646, + "step": 2794 + }, + { + "epoch": 1.381070060546151, + "grad_norm": 1.3291779254725478, + "learning_rate": 4.805526435850523e-06, + "loss": 0.2204340100288391, + "step": 2795 + }, + { + "epoch": 1.3815643148399852, + "grad_norm": 1.2784619373678463, + "learning_rate": 4.798546379525013e-06, + "loss": 0.26289406418800354, + "step": 2796 + }, + { + "epoch": 1.3820585691338194, + "grad_norm": 1.28320111492484, + "learning_rate": 4.7915697956156284e-06, + "loss": 0.24830611050128937, + "step": 2797 + }, + { + "epoch": 1.3825528234276536, + "grad_norm": 1.2879657785107324, + "learning_rate": 4.784596688779825e-06, + "loss": 0.24792183935642242, + "step": 2798 + }, + { + "epoch": 1.3830470777214878, + "grad_norm": 1.2696074389245717, + "learning_rate": 4.777627063672753e-06, + "loss": 0.2689560651779175, + "step": 2799 + }, + { + "epoch": 1.3835413320153218, + "grad_norm": 1.3225545388421776, + "learning_rate": 4.770660924947238e-06, + "loss": 0.24323254823684692, + "step": 2800 + }, + { + "epoch": 1.384035586309156, + "grad_norm": 1.4076671335254063, + "learning_rate": 4.7636982772537645e-06, + "loss": 0.24404528737068176, + "step": 2801 + }, + { + "epoch": 1.3845298406029902, + "grad_norm": 1.203765816908177, + "learning_rate": 4.7567391252405075e-06, + "loss": 0.23512448370456696, + "step": 2802 + }, + { + "epoch": 1.3850240948968244, + "grad_norm": 1.5018331188451308, + "learning_rate": 4.749783473553297e-06, + "loss": 0.26446110010147095, + "step": 2803 + }, + { + "epoch": 1.3855183491906586, + "grad_norm": 1.408580468005289, + "learning_rate": 4.742831326835618e-06, + "loss": 0.24630968272686005, + "step": 2804 + }, + { + "epoch": 1.3860126034844928, + "grad_norm": 1.3358261514200123, + "learning_rate": 4.735882689728628e-06, + "loss": 0.253492146730423, + "step": 2805 + }, + { + "epoch": 1.3865068577783268, + "grad_norm": 1.3501776737603972, + "learning_rate": 4.7289375668711444e-06, + "loss": 0.271090567111969, + "step": 2806 + }, + { + "epoch": 1.387001112072161, + "grad_norm": 1.278147407656648, + "learning_rate": 4.721995962899625e-06, + "loss": 0.24045832455158234, + "step": 2807 + }, + { + "epoch": 1.3874953663659952, + "grad_norm": 1.3482420589650876, + "learning_rate": 4.715057882448187e-06, + "loss": 0.2525935471057892, + "step": 2808 + }, + { + "epoch": 1.3879896206598294, + "grad_norm": 1.6416013674407632, + "learning_rate": 4.708123330148593e-06, + "loss": 0.30852392315864563, + "step": 2809 + }, + { + "epoch": 1.3884838749536637, + "grad_norm": 1.4379358472073636, + "learning_rate": 4.701192310630253e-06, + "loss": 0.2770250737667084, + "step": 2810 + }, + { + "epoch": 1.3889781292474979, + "grad_norm": 1.3872314722590495, + "learning_rate": 4.6942648285202154e-06, + "loss": 0.29135680198669434, + "step": 2811 + }, + { + "epoch": 1.389472383541332, + "grad_norm": 1.3561535153102244, + "learning_rate": 4.687340888443171e-06, + "loss": 0.26933860778808594, + "step": 2812 + }, + { + "epoch": 1.3899666378351663, + "grad_norm": 1.3589820356083573, + "learning_rate": 4.680420495021436e-06, + "loss": 0.26089105010032654, + "step": 2813 + }, + { + "epoch": 1.3904608921290005, + "grad_norm": 1.446680212777315, + "learning_rate": 4.673503652874977e-06, + "loss": 0.26031410694122314, + "step": 2814 + }, + { + "epoch": 1.3909551464228345, + "grad_norm": 1.4223445911905375, + "learning_rate": 4.6665903666213685e-06, + "loss": 0.2887076139450073, + "step": 2815 + }, + { + "epoch": 1.3914494007166687, + "grad_norm": 1.4125652827001185, + "learning_rate": 4.6596806408758275e-06, + "loss": 0.2360706925392151, + "step": 2816 + }, + { + "epoch": 1.3919436550105029, + "grad_norm": 1.2857689419175287, + "learning_rate": 4.652774480251186e-06, + "loss": 0.22275522351264954, + "step": 2817 + }, + { + "epoch": 1.392437909304337, + "grad_norm": 1.4433288432295395, + "learning_rate": 4.645871889357899e-06, + "loss": 0.2425977736711502, + "step": 2818 + }, + { + "epoch": 1.3929321635981713, + "grad_norm": 1.3257241152583827, + "learning_rate": 4.638972872804038e-06, + "loss": 0.25219830870628357, + "step": 2819 + }, + { + "epoch": 1.3934264178920055, + "grad_norm": 1.3749035761313395, + "learning_rate": 4.6320774351952916e-06, + "loss": 0.28060346841812134, + "step": 2820 + }, + { + "epoch": 1.3939206721858395, + "grad_norm": 1.2003147708990263, + "learning_rate": 4.625185581134942e-06, + "loss": 0.2395240217447281, + "step": 2821 + }, + { + "epoch": 1.3944149264796737, + "grad_norm": 1.1704641579429333, + "learning_rate": 4.618297315223906e-06, + "loss": 0.23622646927833557, + "step": 2822 + }, + { + "epoch": 1.394909180773508, + "grad_norm": 1.2829625624138312, + "learning_rate": 4.611412642060692e-06, + "loss": 0.2189474105834961, + "step": 2823 + }, + { + "epoch": 1.395403435067342, + "grad_norm": 1.433264639271618, + "learning_rate": 4.6045315662414e-06, + "loss": 0.266002357006073, + "step": 2824 + }, + { + "epoch": 1.3958976893611763, + "grad_norm": 1.3252437693414834, + "learning_rate": 4.5976540923597425e-06, + "loss": 0.2402176856994629, + "step": 2825 + }, + { + "epoch": 1.3963919436550105, + "grad_norm": 1.359969321526994, + "learning_rate": 4.5907802250070235e-06, + "loss": 0.2493474781513214, + "step": 2826 + }, + { + "epoch": 1.3968861979488447, + "grad_norm": 1.41117190363675, + "learning_rate": 4.583909968772137e-06, + "loss": 0.25716543197631836, + "step": 2827 + }, + { + "epoch": 1.397380452242679, + "grad_norm": 1.2726969842984424, + "learning_rate": 4.57704332824157e-06, + "loss": 0.29470473527908325, + "step": 2828 + }, + { + "epoch": 1.3978747065365131, + "grad_norm": 1.3349562969336177, + "learning_rate": 4.570180307999394e-06, + "loss": 0.28095656633377075, + "step": 2829 + }, + { + "epoch": 1.3983689608303473, + "grad_norm": 1.3296802970374444, + "learning_rate": 4.563320912627256e-06, + "loss": 0.2351825088262558, + "step": 2830 + }, + { + "epoch": 1.3988632151241813, + "grad_norm": 1.378245480597285, + "learning_rate": 4.556465146704399e-06, + "loss": 0.25859856605529785, + "step": 2831 + }, + { + "epoch": 1.3993574694180155, + "grad_norm": 1.3122509634402246, + "learning_rate": 4.549613014807637e-06, + "loss": 0.2503181993961334, + "step": 2832 + }, + { + "epoch": 1.3998517237118497, + "grad_norm": 1.4164889794081637, + "learning_rate": 4.542764521511345e-06, + "loss": 0.26368820667266846, + "step": 2833 + }, + { + "epoch": 1.400345978005684, + "grad_norm": 1.2584462742908673, + "learning_rate": 4.535919671387483e-06, + "loss": 0.24077676236629486, + "step": 2834 + }, + { + "epoch": 1.4008402322995182, + "grad_norm": 1.3906309875331755, + "learning_rate": 4.529078469005577e-06, + "loss": 0.27042093873023987, + "step": 2835 + }, + { + "epoch": 1.4013344865933524, + "grad_norm": 1.3047899471845867, + "learning_rate": 4.5222409189327155e-06, + "loss": 0.2731306552886963, + "step": 2836 + }, + { + "epoch": 1.4018287408871863, + "grad_norm": 1.293016022457822, + "learning_rate": 4.515407025733548e-06, + "loss": 0.2925037741661072, + "step": 2837 + }, + { + "epoch": 1.4023229951810205, + "grad_norm": 1.3019226114538747, + "learning_rate": 4.508576793970285e-06, + "loss": 0.2927025556564331, + "step": 2838 + }, + { + "epoch": 1.4028172494748548, + "grad_norm": 1.2637397509173496, + "learning_rate": 4.5017502282026926e-06, + "loss": 0.26285338401794434, + "step": 2839 + }, + { + "epoch": 1.403311503768689, + "grad_norm": 1.3147900807622677, + "learning_rate": 4.49492733298809e-06, + "loss": 0.22698873281478882, + "step": 2840 + }, + { + "epoch": 1.4038057580625232, + "grad_norm": 1.3171706155487821, + "learning_rate": 4.488108112881339e-06, + "loss": 0.24116170406341553, + "step": 2841 + }, + { + "epoch": 1.4043000123563574, + "grad_norm": 1.57472275672956, + "learning_rate": 4.481292572434852e-06, + "loss": 0.3211704194545746, + "step": 2842 + }, + { + "epoch": 1.4047942666501916, + "grad_norm": 1.3631722904804857, + "learning_rate": 4.474480716198598e-06, + "loss": 0.26634523272514343, + "step": 2843 + }, + { + "epoch": 1.4052885209440258, + "grad_norm": 1.2801660794508798, + "learning_rate": 4.467672548720066e-06, + "loss": 0.24751242995262146, + "step": 2844 + }, + { + "epoch": 1.40578277523786, + "grad_norm": 1.2023997182117507, + "learning_rate": 4.4608680745442915e-06, + "loss": 0.22031354904174805, + "step": 2845 + }, + { + "epoch": 1.406277029531694, + "grad_norm": 1.4549549871552898, + "learning_rate": 4.454067298213847e-06, + "loss": 0.2474634051322937, + "step": 2846 + }, + { + "epoch": 1.4067712838255282, + "grad_norm": 1.2925543429398942, + "learning_rate": 4.4472702242688315e-06, + "loss": 0.2494845986366272, + "step": 2847 + }, + { + "epoch": 1.4072655381193624, + "grad_norm": 1.246615378915442, + "learning_rate": 4.440476857246876e-06, + "loss": 0.23150494694709778, + "step": 2848 + }, + { + "epoch": 1.4077597924131966, + "grad_norm": 1.3473585855048795, + "learning_rate": 4.433687201683138e-06, + "loss": 0.2093413770198822, + "step": 2849 + }, + { + "epoch": 1.4082540467070308, + "grad_norm": 1.4247715723132508, + "learning_rate": 4.426901262110287e-06, + "loss": 0.26741865277290344, + "step": 2850 + }, + { + "epoch": 1.408748301000865, + "grad_norm": 1.3965732526570211, + "learning_rate": 4.420119043058521e-06, + "loss": 0.2599044740200043, + "step": 2851 + }, + { + "epoch": 1.409242555294699, + "grad_norm": 1.37695062225065, + "learning_rate": 4.413340549055562e-06, + "loss": 0.26934683322906494, + "step": 2852 + }, + { + "epoch": 1.4097368095885332, + "grad_norm": 1.247550824996485, + "learning_rate": 4.4065657846266255e-06, + "loss": 0.2609720528125763, + "step": 2853 + }, + { + "epoch": 1.4102310638823674, + "grad_norm": 1.3034094501092508, + "learning_rate": 4.39979475429445e-06, + "loss": 0.23431813716888428, + "step": 2854 + }, + { + "epoch": 1.4107253181762016, + "grad_norm": 1.5127417165274348, + "learning_rate": 4.39302746257928e-06, + "loss": 0.2791878581047058, + "step": 2855 + }, + { + "epoch": 1.4112195724700358, + "grad_norm": 1.445393105302077, + "learning_rate": 4.386263913998862e-06, + "loss": 0.30482247471809387, + "step": 2856 + }, + { + "epoch": 1.41171382676387, + "grad_norm": 1.517774336378155, + "learning_rate": 4.379504113068445e-06, + "loss": 0.24561305344104767, + "step": 2857 + }, + { + "epoch": 1.4122080810577042, + "grad_norm": 1.2686201180133903, + "learning_rate": 4.372748064300777e-06, + "loss": 0.23973286151885986, + "step": 2858 + }, + { + "epoch": 1.4127023353515384, + "grad_norm": 1.2884315615066577, + "learning_rate": 4.365995772206092e-06, + "loss": 0.26788556575775146, + "step": 2859 + }, + { + "epoch": 1.4131965896453726, + "grad_norm": 1.2479985472864645, + "learning_rate": 4.359247241292136e-06, + "loss": 0.22432288527488708, + "step": 2860 + }, + { + "epoch": 1.4136908439392069, + "grad_norm": 1.4071442664764462, + "learning_rate": 4.352502476064121e-06, + "loss": 0.282687783241272, + "step": 2861 + }, + { + "epoch": 1.4141850982330408, + "grad_norm": 1.350175603929749, + "learning_rate": 4.345761481024761e-06, + "loss": 0.2516692578792572, + "step": 2862 + }, + { + "epoch": 1.414679352526875, + "grad_norm": 1.3813903906983658, + "learning_rate": 4.3390242606742465e-06, + "loss": 0.2473583221435547, + "step": 2863 + }, + { + "epoch": 1.4151736068207093, + "grad_norm": 1.365125849897862, + "learning_rate": 4.33229081951025e-06, + "loss": 0.24372908473014832, + "step": 2864 + }, + { + "epoch": 1.4156678611145435, + "grad_norm": 1.935117633937839, + "learning_rate": 4.325561162027922e-06, + "loss": 0.2877897024154663, + "step": 2865 + }, + { + "epoch": 1.4161621154083777, + "grad_norm": 1.3789670558806315, + "learning_rate": 4.318835292719886e-06, + "loss": 0.2554720342159271, + "step": 2866 + }, + { + "epoch": 1.4166563697022119, + "grad_norm": 1.400243578908533, + "learning_rate": 4.312113216076228e-06, + "loss": 0.26695260405540466, + "step": 2867 + }, + { + "epoch": 1.4171506239960459, + "grad_norm": 1.310264039945657, + "learning_rate": 4.305394936584522e-06, + "loss": 0.26983851194381714, + "step": 2868 + }, + { + "epoch": 1.41764487828988, + "grad_norm": 1.4664847959785403, + "learning_rate": 4.298680458729793e-06, + "loss": 0.303170382976532, + "step": 2869 + }, + { + "epoch": 1.4181391325837143, + "grad_norm": 1.2870012899484584, + "learning_rate": 4.2919697869945234e-06, + "loss": 0.23217584192752838, + "step": 2870 + }, + { + "epoch": 1.4186333868775485, + "grad_norm": 1.3723703910904035, + "learning_rate": 4.285262925858663e-06, + "loss": 0.2895517349243164, + "step": 2871 + }, + { + "epoch": 1.4191276411713827, + "grad_norm": 1.3083324921698822, + "learning_rate": 4.278559879799628e-06, + "loss": 0.24025630950927734, + "step": 2872 + }, + { + "epoch": 1.4196218954652169, + "grad_norm": 1.2827271091784578, + "learning_rate": 4.271860653292263e-06, + "loss": 0.22810839116573334, + "step": 2873 + }, + { + "epoch": 1.420116149759051, + "grad_norm": 1.3806208017840322, + "learning_rate": 4.26516525080888e-06, + "loss": 0.266724169254303, + "step": 2874 + }, + { + "epoch": 1.4206104040528853, + "grad_norm": 1.225057219675358, + "learning_rate": 4.25847367681924e-06, + "loss": 0.22618745267391205, + "step": 2875 + }, + { + "epoch": 1.4211046583467195, + "grad_norm": 1.2369737958102245, + "learning_rate": 4.251785935790529e-06, + "loss": 0.2239789217710495, + "step": 2876 + }, + { + "epoch": 1.4215989126405535, + "grad_norm": 1.4266723106614325, + "learning_rate": 4.245102032187399e-06, + "loss": 0.21519358456134796, + "step": 2877 + }, + { + "epoch": 1.4220931669343877, + "grad_norm": 1.3543349519259755, + "learning_rate": 4.2384219704719284e-06, + "loss": 0.31226712465286255, + "step": 2878 + }, + { + "epoch": 1.422587421228222, + "grad_norm": 1.56763311196269, + "learning_rate": 4.231745755103625e-06, + "loss": 0.26814836263656616, + "step": 2879 + }, + { + "epoch": 1.423081675522056, + "grad_norm": 1.340943129837897, + "learning_rate": 4.225073390539436e-06, + "loss": 0.2369621843099594, + "step": 2880 + }, + { + "epoch": 1.4235759298158903, + "grad_norm": 1.4174455321042607, + "learning_rate": 4.218404881233737e-06, + "loss": 0.2556746304035187, + "step": 2881 + }, + { + "epoch": 1.4240701841097245, + "grad_norm": 1.4008574237374047, + "learning_rate": 4.2117402316383314e-06, + "loss": 0.25875598192214966, + "step": 2882 + }, + { + "epoch": 1.4245644384035585, + "grad_norm": 1.3837412182941131, + "learning_rate": 4.205079446202443e-06, + "loss": 0.26839762926101685, + "step": 2883 + }, + { + "epoch": 1.4250586926973927, + "grad_norm": 1.3404796422391116, + "learning_rate": 4.198422529372717e-06, + "loss": 0.2764383554458618, + "step": 2884 + }, + { + "epoch": 1.425552946991227, + "grad_norm": 1.6233600341280843, + "learning_rate": 4.191769485593216e-06, + "loss": 0.24517112970352173, + "step": 2885 + }, + { + "epoch": 1.4260472012850611, + "grad_norm": 1.2960278491651354, + "learning_rate": 4.18512031930542e-06, + "loss": 0.21880990266799927, + "step": 2886 + }, + { + "epoch": 1.4265414555788953, + "grad_norm": 1.25547495232964, + "learning_rate": 4.178475034948212e-06, + "loss": 0.24671246111392975, + "step": 2887 + }, + { + "epoch": 1.4270357098727295, + "grad_norm": 1.3321806455697769, + "learning_rate": 4.171833636957886e-06, + "loss": 0.25473371148109436, + "step": 2888 + }, + { + "epoch": 1.4275299641665637, + "grad_norm": 1.2832708163920512, + "learning_rate": 4.1651961297681574e-06, + "loss": 0.2675618529319763, + "step": 2889 + }, + { + "epoch": 1.428024218460398, + "grad_norm": 1.361777795281808, + "learning_rate": 4.15856251781012e-06, + "loss": 0.24357986450195312, + "step": 2890 + }, + { + "epoch": 1.4285184727542322, + "grad_norm": 1.360475333723739, + "learning_rate": 4.1519328055122825e-06, + "loss": 0.2668409049510956, + "step": 2891 + }, + { + "epoch": 1.4290127270480664, + "grad_norm": 1.237397304360782, + "learning_rate": 4.145306997300543e-06, + "loss": 0.24507637321949005, + "step": 2892 + }, + { + "epoch": 1.4295069813419004, + "grad_norm": 1.366253286129835, + "learning_rate": 4.1386850975982e-06, + "loss": 0.2791709899902344, + "step": 2893 + }, + { + "epoch": 1.4300012356357346, + "grad_norm": 1.2339989570889298, + "learning_rate": 4.132067110825939e-06, + "loss": 0.24982133507728577, + "step": 2894 + }, + { + "epoch": 1.4304954899295688, + "grad_norm": 1.4357848897595227, + "learning_rate": 4.125453041401835e-06, + "loss": 0.2814679741859436, + "step": 2895 + }, + { + "epoch": 1.430989744223403, + "grad_norm": 1.2447298736764703, + "learning_rate": 4.118842893741336e-06, + "loss": 0.22699782252311707, + "step": 2896 + }, + { + "epoch": 1.4314839985172372, + "grad_norm": 1.9366220135779266, + "learning_rate": 4.112236672257294e-06, + "loss": 0.23297230899333954, + "step": 2897 + }, + { + "epoch": 1.4319782528110712, + "grad_norm": 1.4169021772429402, + "learning_rate": 4.1056343813599265e-06, + "loss": 0.26085159182548523, + "step": 2898 + }, + { + "epoch": 1.4324725071049054, + "grad_norm": 1.2947699028454482, + "learning_rate": 4.0990360254568216e-06, + "loss": 0.27813559770584106, + "step": 2899 + }, + { + "epoch": 1.4329667613987396, + "grad_norm": 1.4648322974961994, + "learning_rate": 4.092441608952953e-06, + "loss": 0.2821611762046814, + "step": 2900 + }, + { + "epoch": 1.4334610156925738, + "grad_norm": 1.4262304528738896, + "learning_rate": 4.085851136250657e-06, + "loss": 0.25223150849342346, + "step": 2901 + }, + { + "epoch": 1.433955269986408, + "grad_norm": 1.2236760469459784, + "learning_rate": 4.079264611749639e-06, + "loss": 0.225361630320549, + "step": 2902 + }, + { + "epoch": 1.4344495242802422, + "grad_norm": 1.2980114377261416, + "learning_rate": 4.07268203984697e-06, + "loss": 0.2564583420753479, + "step": 2903 + }, + { + "epoch": 1.4349437785740764, + "grad_norm": 1.618238680371033, + "learning_rate": 4.066103424937083e-06, + "loss": 0.2433827817440033, + "step": 2904 + }, + { + "epoch": 1.4354380328679106, + "grad_norm": 1.326779755851318, + "learning_rate": 4.059528771411758e-06, + "loss": 0.26073208451271057, + "step": 2905 + }, + { + "epoch": 1.4359322871617448, + "grad_norm": 1.381783420476221, + "learning_rate": 4.052958083660153e-06, + "loss": 0.2937609553337097, + "step": 2906 + }, + { + "epoch": 1.436426541455579, + "grad_norm": 1.2248682484343931, + "learning_rate": 4.046391366068756e-06, + "loss": 0.22026552259922028, + "step": 2907 + }, + { + "epoch": 1.436920795749413, + "grad_norm": 1.2471555303405935, + "learning_rate": 4.039828623021415e-06, + "loss": 0.21137471497058868, + "step": 2908 + }, + { + "epoch": 1.4374150500432472, + "grad_norm": 1.316365476590171, + "learning_rate": 4.033269858899324e-06, + "loss": 0.23597699403762817, + "step": 2909 + }, + { + "epoch": 1.4379093043370814, + "grad_norm": 1.3166979356724768, + "learning_rate": 4.026715078081023e-06, + "loss": 0.2667025923728943, + "step": 2910 + }, + { + "epoch": 1.4384035586309156, + "grad_norm": 1.2942746954451143, + "learning_rate": 4.020164284942387e-06, + "loss": 0.2789616584777832, + "step": 2911 + }, + { + "epoch": 1.4388978129247498, + "grad_norm": 1.2105601579452838, + "learning_rate": 4.013617483856637e-06, + "loss": 0.23176617920398712, + "step": 2912 + }, + { + "epoch": 1.439392067218584, + "grad_norm": 1.3989428986083243, + "learning_rate": 4.007074679194313e-06, + "loss": 0.2814248204231262, + "step": 2913 + }, + { + "epoch": 1.439886321512418, + "grad_norm": 1.7399518805726892, + "learning_rate": 4.000535875323307e-06, + "loss": 0.26201730966567993, + "step": 2914 + }, + { + "epoch": 1.4403805758062522, + "grad_norm": 1.3752450122135709, + "learning_rate": 3.994001076608833e-06, + "loss": 0.22517681121826172, + "step": 2915 + }, + { + "epoch": 1.4408748301000864, + "grad_norm": 1.2576751634156127, + "learning_rate": 3.9874702874134205e-06, + "loss": 0.25220564007759094, + "step": 2916 + }, + { + "epoch": 1.4413690843939206, + "grad_norm": 1.3128506030513347, + "learning_rate": 3.980943512096934e-06, + "loss": 0.23441332578659058, + "step": 2917 + }, + { + "epoch": 1.4418633386877548, + "grad_norm": 1.1616125895518352, + "learning_rate": 3.9744207550165625e-06, + "loss": 0.21659764647483826, + "step": 2918 + }, + { + "epoch": 1.442357592981589, + "grad_norm": 1.3726974417027011, + "learning_rate": 3.967902020526797e-06, + "loss": 0.21888667345046997, + "step": 2919 + }, + { + "epoch": 1.4428518472754233, + "grad_norm": 2.445936326011648, + "learning_rate": 3.961387312979454e-06, + "loss": 0.2771157920360565, + "step": 2920 + }, + { + "epoch": 1.4433461015692575, + "grad_norm": 1.312047281106489, + "learning_rate": 3.9548766367236605e-06, + "loss": 0.21376901865005493, + "step": 2921 + }, + { + "epoch": 1.4438403558630917, + "grad_norm": 1.4472763394283668, + "learning_rate": 3.948369996105849e-06, + "loss": 0.2888128161430359, + "step": 2922 + }, + { + "epoch": 1.4443346101569257, + "grad_norm": 1.327788891714265, + "learning_rate": 3.941867395469761e-06, + "loss": 0.27809786796569824, + "step": 2923 + }, + { + "epoch": 1.4448288644507599, + "grad_norm": 1.377899507369851, + "learning_rate": 3.935368839156443e-06, + "loss": 0.2573625445365906, + "step": 2924 + }, + { + "epoch": 1.445323118744594, + "grad_norm": 1.5375959387987326, + "learning_rate": 3.928874331504232e-06, + "loss": 0.21472841501235962, + "step": 2925 + }, + { + "epoch": 1.4458173730384283, + "grad_norm": 1.2616393731465387, + "learning_rate": 3.922383876848771e-06, + "loss": 0.23214091360569, + "step": 2926 + }, + { + "epoch": 1.4463116273322625, + "grad_norm": 1.2717196020996628, + "learning_rate": 3.915897479522995e-06, + "loss": 0.23830139636993408, + "step": 2927 + }, + { + "epoch": 1.4468058816260967, + "grad_norm": 1.306053937449173, + "learning_rate": 3.909415143857132e-06, + "loss": 0.2519805431365967, + "step": 2928 + }, + { + "epoch": 1.4473001359199307, + "grad_norm": 1.3548983452054761, + "learning_rate": 3.9029368741786935e-06, + "loss": 0.2191445231437683, + "step": 2929 + }, + { + "epoch": 1.4477943902137649, + "grad_norm": 1.2448486288410623, + "learning_rate": 3.896462674812482e-06, + "loss": 0.2267228364944458, + "step": 2930 + }, + { + "epoch": 1.448288644507599, + "grad_norm": 1.3302096442776044, + "learning_rate": 3.88999255008058e-06, + "loss": 0.26456522941589355, + "step": 2931 + }, + { + "epoch": 1.4487828988014333, + "grad_norm": 1.3729869343228434, + "learning_rate": 3.883526504302353e-06, + "loss": 0.25602713227272034, + "step": 2932 + }, + { + "epoch": 1.4492771530952675, + "grad_norm": 1.9847312680384686, + "learning_rate": 3.877064541794435e-06, + "loss": 0.2545332610607147, + "step": 2933 + }, + { + "epoch": 1.4497714073891017, + "grad_norm": 1.3785644388388194, + "learning_rate": 3.87060666687074e-06, + "loss": 0.2846388816833496, + "step": 2934 + }, + { + "epoch": 1.450265661682936, + "grad_norm": 1.4353094721790403, + "learning_rate": 3.864152883842461e-06, + "loss": 0.2686496376991272, + "step": 2935 + }, + { + "epoch": 1.4507599159767701, + "grad_norm": 1.2943779410551872, + "learning_rate": 3.857703197018044e-06, + "loss": 0.2712322473526001, + "step": 2936 + }, + { + "epoch": 1.4512541702706043, + "grad_norm": 1.3542096863749147, + "learning_rate": 3.851257610703209e-06, + "loss": 0.23492589592933655, + "step": 2937 + }, + { + "epoch": 1.4517484245644385, + "grad_norm": 1.2747230322582852, + "learning_rate": 3.84481612920094e-06, + "loss": 0.274332731962204, + "step": 2938 + }, + { + "epoch": 1.4522426788582725, + "grad_norm": 1.4107112786506069, + "learning_rate": 3.838378756811475e-06, + "loss": 0.250995010137558, + "step": 2939 + }, + { + "epoch": 1.4527369331521067, + "grad_norm": 1.3749429977256393, + "learning_rate": 3.831945497832313e-06, + "loss": 0.25221261382102966, + "step": 2940 + }, + { + "epoch": 1.453231187445941, + "grad_norm": 1.4826415922959744, + "learning_rate": 3.825516356558211e-06, + "loss": 0.2549906075000763, + "step": 2941 + }, + { + "epoch": 1.4537254417397751, + "grad_norm": 1.296751596925164, + "learning_rate": 3.819091337281158e-06, + "loss": 0.2369248867034912, + "step": 2942 + }, + { + "epoch": 1.4542196960336093, + "grad_norm": 1.3057816538242708, + "learning_rate": 3.8126704442904182e-06, + "loss": 0.23681433498859406, + "step": 2943 + }, + { + "epoch": 1.4547139503274436, + "grad_norm": 1.237019268284654, + "learning_rate": 3.806253681872486e-06, + "loss": 0.24966523051261902, + "step": 2944 + }, + { + "epoch": 1.4552082046212775, + "grad_norm": 1.4768369352256168, + "learning_rate": 3.7998410543110954e-06, + "loss": 0.28130626678466797, + "step": 2945 + }, + { + "epoch": 1.4557024589151117, + "grad_norm": 1.3443210173277784, + "learning_rate": 3.7934325658872275e-06, + "loss": 0.2725732922554016, + "step": 2946 + }, + { + "epoch": 1.456196713208946, + "grad_norm": 1.3345618379823432, + "learning_rate": 3.7870282208790976e-06, + "loss": 0.23695361614227295, + "step": 2947 + }, + { + "epoch": 1.4566909675027802, + "grad_norm": 1.3094683367768178, + "learning_rate": 3.780628023562154e-06, + "loss": 0.2556610405445099, + "step": 2948 + }, + { + "epoch": 1.4571852217966144, + "grad_norm": 1.29841880424943, + "learning_rate": 3.7742319782090786e-06, + "loss": 0.26012274622917175, + "step": 2949 + }, + { + "epoch": 1.4576794760904486, + "grad_norm": 1.4612114957138427, + "learning_rate": 3.7678400890897827e-06, + "loss": 0.23788896203041077, + "step": 2950 + }, + { + "epoch": 1.4581737303842828, + "grad_norm": 1.4390155766896275, + "learning_rate": 3.7614523604713894e-06, + "loss": 0.2927572727203369, + "step": 2951 + }, + { + "epoch": 1.458667984678117, + "grad_norm": 1.2435143086118214, + "learning_rate": 3.75506879661827e-06, + "loss": 0.2254970222711563, + "step": 2952 + }, + { + "epoch": 1.4591622389719512, + "grad_norm": 1.2816222898303182, + "learning_rate": 3.7486894017919883e-06, + "loss": 0.216854065656662, + "step": 2953 + }, + { + "epoch": 1.4596564932657852, + "grad_norm": 1.1833481657982283, + "learning_rate": 3.7423141802513417e-06, + "loss": 0.2505137026309967, + "step": 2954 + }, + { + "epoch": 1.4601507475596194, + "grad_norm": 1.2187582021965486, + "learning_rate": 3.735943136252337e-06, + "loss": 0.19780108332633972, + "step": 2955 + }, + { + "epoch": 1.4606450018534536, + "grad_norm": 1.482633837182769, + "learning_rate": 3.7295762740481923e-06, + "loss": 0.26869216561317444, + "step": 2956 + }, + { + "epoch": 1.4611392561472878, + "grad_norm": 1.4121232274028632, + "learning_rate": 3.7232135978893336e-06, + "loss": 0.28265517950057983, + "step": 2957 + }, + { + "epoch": 1.461633510441122, + "grad_norm": 1.268342410891318, + "learning_rate": 3.7168551120233965e-06, + "loss": 0.2381918877363205, + "step": 2958 + }, + { + "epoch": 1.4621277647349562, + "grad_norm": 1.3343795310746396, + "learning_rate": 3.710500820695203e-06, + "loss": 0.27194735407829285, + "step": 2959 + }, + { + "epoch": 1.4626220190287902, + "grad_norm": 1.419071318428777, + "learning_rate": 3.7041507281468e-06, + "loss": 0.2611599266529083, + "step": 2960 + }, + { + "epoch": 1.4631162733226244, + "grad_norm": 1.3417831313824735, + "learning_rate": 3.697804838617418e-06, + "loss": 0.2970972955226898, + "step": 2961 + }, + { + "epoch": 1.4636105276164586, + "grad_norm": 1.3986503652920064, + "learning_rate": 3.6914631563434743e-06, + "loss": 0.24313557147979736, + "step": 2962 + }, + { + "epoch": 1.4641047819102928, + "grad_norm": 1.21693161859368, + "learning_rate": 3.685125685558587e-06, + "loss": 0.23243792355060577, + "step": 2963 + }, + { + "epoch": 1.464599036204127, + "grad_norm": 1.384655578733909, + "learning_rate": 3.6787924304935696e-06, + "loss": 0.2850711941719055, + "step": 2964 + }, + { + "epoch": 1.4650932904979612, + "grad_norm": 1.2938153090671698, + "learning_rate": 3.6724633953764023e-06, + "loss": 0.26217392086982727, + "step": 2965 + }, + { + "epoch": 1.4655875447917954, + "grad_norm": 1.3004956100522334, + "learning_rate": 3.666138584432264e-06, + "loss": 0.24623268842697144, + "step": 2966 + }, + { + "epoch": 1.4660817990856296, + "grad_norm": 1.2765502382143128, + "learning_rate": 3.6598180018835063e-06, + "loss": 0.25010040402412415, + "step": 2967 + }, + { + "epoch": 1.4665760533794638, + "grad_norm": 1.2806642930208934, + "learning_rate": 3.6535016519496603e-06, + "loss": 0.24471378326416016, + "step": 2968 + }, + { + "epoch": 1.467070307673298, + "grad_norm": 1.4411992818002375, + "learning_rate": 3.6471895388474323e-06, + "loss": 0.2845621109008789, + "step": 2969 + }, + { + "epoch": 1.467564561967132, + "grad_norm": 1.394997312403621, + "learning_rate": 3.640881666790699e-06, + "loss": 0.26768919825553894, + "step": 2970 + }, + { + "epoch": 1.4680588162609662, + "grad_norm": 1.3707198305280583, + "learning_rate": 3.6345780399904983e-06, + "loss": 0.27386170625686646, + "step": 2971 + }, + { + "epoch": 1.4685530705548004, + "grad_norm": 1.2413908046529407, + "learning_rate": 3.628278662655055e-06, + "loss": 0.259655237197876, + "step": 2972 + }, + { + "epoch": 1.4690473248486347, + "grad_norm": 1.2328404027424946, + "learning_rate": 3.6219835389897305e-06, + "loss": 0.2234620749950409, + "step": 2973 + }, + { + "epoch": 1.4695415791424689, + "grad_norm": 1.2170225214049992, + "learning_rate": 3.6156926731970664e-06, + "loss": 0.25133174657821655, + "step": 2974 + }, + { + "epoch": 1.4700358334363028, + "grad_norm": 1.4753631122763826, + "learning_rate": 3.609406069476752e-06, + "loss": 0.2856005132198334, + "step": 2975 + }, + { + "epoch": 1.470530087730137, + "grad_norm": 1.352763052735898, + "learning_rate": 3.603123732025635e-06, + "loss": 0.23760217428207397, + "step": 2976 + }, + { + "epoch": 1.4710243420239713, + "grad_norm": 1.315945468844056, + "learning_rate": 3.596845665037715e-06, + "loss": 0.2344968169927597, + "step": 2977 + }, + { + "epoch": 1.4715185963178055, + "grad_norm": 1.3513242562279373, + "learning_rate": 3.5905718727041415e-06, + "loss": 0.23936885595321655, + "step": 2978 + }, + { + "epoch": 1.4720128506116397, + "grad_norm": 1.2281537442777626, + "learning_rate": 3.584302359213204e-06, + "loss": 0.24542436003684998, + "step": 2979 + }, + { + "epoch": 1.4725071049054739, + "grad_norm": 1.2816242991916544, + "learning_rate": 3.578037128750338e-06, + "loss": 0.24754226207733154, + "step": 2980 + }, + { + "epoch": 1.473001359199308, + "grad_norm": 1.3406109779820896, + "learning_rate": 3.5717761854981335e-06, + "loss": 0.25167495012283325, + "step": 2981 + }, + { + "epoch": 1.4734956134931423, + "grad_norm": 1.2820406301810907, + "learning_rate": 3.565519533636296e-06, + "loss": 0.21352116763591766, + "step": 2982 + }, + { + "epoch": 1.4739898677869765, + "grad_norm": 1.5800404779419173, + "learning_rate": 3.5592671773416798e-06, + "loss": 0.24721838533878326, + "step": 2983 + }, + { + "epoch": 1.4744841220808107, + "grad_norm": 1.209332122723965, + "learning_rate": 3.5530191207882705e-06, + "loss": 0.2098400741815567, + "step": 2984 + }, + { + "epoch": 1.4749783763746447, + "grad_norm": 1.4059961620340085, + "learning_rate": 3.5467753681471784e-06, + "loss": 0.27138370275497437, + "step": 2985 + }, + { + "epoch": 1.475472630668479, + "grad_norm": 1.456553871591733, + "learning_rate": 3.5405359235866468e-06, + "loss": 0.2675255537033081, + "step": 2986 + }, + { + "epoch": 1.475966884962313, + "grad_norm": 1.3852192514849078, + "learning_rate": 3.5343007912720397e-06, + "loss": 0.2927984893321991, + "step": 2987 + }, + { + "epoch": 1.4764611392561473, + "grad_norm": 1.4840757807353469, + "learning_rate": 3.5280699753658354e-06, + "loss": 0.2897256910800934, + "step": 2988 + }, + { + "epoch": 1.4769553935499815, + "grad_norm": 1.3162511876956198, + "learning_rate": 3.521843480027646e-06, + "loss": 0.25903570652008057, + "step": 2989 + }, + { + "epoch": 1.4774496478438157, + "grad_norm": 1.1815962199969574, + "learning_rate": 3.515621309414191e-06, + "loss": 0.2097684144973755, + "step": 2990 + }, + { + "epoch": 1.4779439021376497, + "grad_norm": 1.368257943211956, + "learning_rate": 3.5094034676792952e-06, + "loss": 0.25807827711105347, + "step": 2991 + }, + { + "epoch": 1.478438156431484, + "grad_norm": 1.3326288392160186, + "learning_rate": 3.503189958973906e-06, + "loss": 0.24161803722381592, + "step": 2992 + }, + { + "epoch": 1.4789324107253181, + "grad_norm": 1.3735233821721475, + "learning_rate": 3.4969807874460717e-06, + "loss": 0.2612338364124298, + "step": 2993 + }, + { + "epoch": 1.4794266650191523, + "grad_norm": 1.3484776453875857, + "learning_rate": 3.490775957240947e-06, + "loss": 0.2529192566871643, + "step": 2994 + }, + { + "epoch": 1.4799209193129865, + "grad_norm": 1.376626480795096, + "learning_rate": 3.4845754725007883e-06, + "loss": 0.2616920471191406, + "step": 2995 + }, + { + "epoch": 1.4804151736068207, + "grad_norm": 1.1709509708234012, + "learning_rate": 3.4783793373649534e-06, + "loss": 0.2372770607471466, + "step": 2996 + }, + { + "epoch": 1.480909427900655, + "grad_norm": 1.6683733615888718, + "learning_rate": 3.4721875559698826e-06, + "loss": 0.2993369996547699, + "step": 2997 + }, + { + "epoch": 1.4814036821944891, + "grad_norm": 1.444631738912031, + "learning_rate": 3.4660001324491354e-06, + "loss": 0.2703147530555725, + "step": 2998 + }, + { + "epoch": 1.4818979364883234, + "grad_norm": 1.497851135078702, + "learning_rate": 3.459817070933337e-06, + "loss": 0.2909662425518036, + "step": 2999 + }, + { + "epoch": 1.4823921907821573, + "grad_norm": 1.4957339087199897, + "learning_rate": 3.4536383755502146e-06, + "loss": 0.2620519697666168, + "step": 3000 + }, + { + "epoch": 1.4828864450759915, + "grad_norm": 1.4607702963487426, + "learning_rate": 3.447464050424576e-06, + "loss": 0.2740327715873718, + "step": 3001 + }, + { + "epoch": 1.4833806993698258, + "grad_norm": 1.4051737005514326, + "learning_rate": 3.441294099678314e-06, + "loss": 0.2597920000553131, + "step": 3002 + }, + { + "epoch": 1.48387495366366, + "grad_norm": 1.2931150222772085, + "learning_rate": 3.435128527430397e-06, + "loss": 0.23138844966888428, + "step": 3003 + }, + { + "epoch": 1.4843692079574942, + "grad_norm": 1.4678522965018421, + "learning_rate": 3.428967337796879e-06, + "loss": 0.26457998156547546, + "step": 3004 + }, + { + "epoch": 1.4848634622513284, + "grad_norm": 1.3435199008351797, + "learning_rate": 3.4228105348908703e-06, + "loss": 0.22283414006233215, + "step": 3005 + }, + { + "epoch": 1.4853577165451624, + "grad_norm": 1.404722725472706, + "learning_rate": 3.416658122822576e-06, + "loss": 0.26169392466545105, + "step": 3006 + }, + { + "epoch": 1.4858519708389966, + "grad_norm": 1.3942121909077798, + "learning_rate": 3.4105101056992574e-06, + "loss": 0.22738765180110931, + "step": 3007 + }, + { + "epoch": 1.4863462251328308, + "grad_norm": 1.640113120385147, + "learning_rate": 3.404366487625237e-06, + "loss": 0.24252702295780182, + "step": 3008 + }, + { + "epoch": 1.486840479426665, + "grad_norm": 1.2658350422978366, + "learning_rate": 3.398227272701905e-06, + "loss": 0.2192659229040146, + "step": 3009 + }, + { + "epoch": 1.4873347337204992, + "grad_norm": 1.3659525117305242, + "learning_rate": 3.3920924650277253e-06, + "loss": 0.23824100196361542, + "step": 3010 + }, + { + "epoch": 1.4878289880143334, + "grad_norm": 1.304246601014088, + "learning_rate": 3.3859620686981977e-06, + "loss": 0.25558948516845703, + "step": 3011 + }, + { + "epoch": 1.4883232423081676, + "grad_norm": 1.2977660969069507, + "learning_rate": 3.3798360878058887e-06, + "loss": 0.23521414399147034, + "step": 3012 + }, + { + "epoch": 1.4888174966020018, + "grad_norm": 1.5059732923775448, + "learning_rate": 3.373714526440417e-06, + "loss": 0.26024043560028076, + "step": 3013 + }, + { + "epoch": 1.489311750895836, + "grad_norm": 1.3966534942487767, + "learning_rate": 3.3675973886884506e-06, + "loss": 0.2676945626735687, + "step": 3014 + }, + { + "epoch": 1.4898060051896702, + "grad_norm": 1.4302757106543351, + "learning_rate": 3.361484678633701e-06, + "loss": 0.29499778151512146, + "step": 3015 + }, + { + "epoch": 1.4903002594835042, + "grad_norm": 1.2541194356509255, + "learning_rate": 3.35537640035693e-06, + "loss": 0.21667227149009705, + "step": 3016 + }, + { + "epoch": 1.4907945137773384, + "grad_norm": 1.5055716214820787, + "learning_rate": 3.3492725579359288e-06, + "loss": 0.2852727770805359, + "step": 3017 + }, + { + "epoch": 1.4912887680711726, + "grad_norm": 1.3110566349547437, + "learning_rate": 3.343173155445546e-06, + "loss": 0.22535362839698792, + "step": 3018 + }, + { + "epoch": 1.4917830223650068, + "grad_norm": 1.3390943365322368, + "learning_rate": 3.3370781969576473e-06, + "loss": 0.23513402044773102, + "step": 3019 + }, + { + "epoch": 1.492277276658841, + "grad_norm": 1.34171251218287, + "learning_rate": 3.3309876865411426e-06, + "loss": 0.2343328893184662, + "step": 3020 + }, + { + "epoch": 1.4927715309526752, + "grad_norm": 1.4982279835949508, + "learning_rate": 3.3249016282619696e-06, + "loss": 0.309964656829834, + "step": 3021 + }, + { + "epoch": 1.4932657852465092, + "grad_norm": 1.4104830526650916, + "learning_rate": 3.318820026183095e-06, + "loss": 0.2678214907646179, + "step": 3022 + }, + { + "epoch": 1.4937600395403434, + "grad_norm": 1.3871314289257326, + "learning_rate": 3.312742884364508e-06, + "loss": 0.24117907881736755, + "step": 3023 + }, + { + "epoch": 1.4942542938341776, + "grad_norm": 1.4966526123322192, + "learning_rate": 3.306670206863225e-06, + "loss": 0.23572009801864624, + "step": 3024 + }, + { + "epoch": 1.4947485481280118, + "grad_norm": 1.1974970903692888, + "learning_rate": 3.3006019977332728e-06, + "loss": 0.20058652758598328, + "step": 3025 + }, + { + "epoch": 1.495242802421846, + "grad_norm": 1.4552709446661256, + "learning_rate": 3.2945382610257017e-06, + "loss": 0.2433123141527176, + "step": 3026 + }, + { + "epoch": 1.4957370567156802, + "grad_norm": 1.330592869585441, + "learning_rate": 3.2884790007885834e-06, + "loss": 0.2648032009601593, + "step": 3027 + }, + { + "epoch": 1.4962313110095145, + "grad_norm": 1.4274009022113794, + "learning_rate": 3.2824242210669853e-06, + "loss": 0.23508986830711365, + "step": 3028 + }, + { + "epoch": 1.4967255653033487, + "grad_norm": 1.337116326245031, + "learning_rate": 3.2763739259029946e-06, + "loss": 0.2340327799320221, + "step": 3029 + }, + { + "epoch": 1.4972198195971829, + "grad_norm": 1.4724312525996526, + "learning_rate": 3.2703281193357028e-06, + "loss": 0.24071671068668365, + "step": 3030 + }, + { + "epoch": 1.4977140738910169, + "grad_norm": 1.4191732736253682, + "learning_rate": 3.264286805401203e-06, + "loss": 0.26332271099090576, + "step": 3031 + }, + { + "epoch": 1.498208328184851, + "grad_norm": 1.266600605298302, + "learning_rate": 3.2582499881325904e-06, + "loss": 0.21818014979362488, + "step": 3032 + }, + { + "epoch": 1.4987025824786853, + "grad_norm": 1.3340246980776698, + "learning_rate": 3.2522176715599606e-06, + "loss": 0.26997917890548706, + "step": 3033 + }, + { + "epoch": 1.4991968367725195, + "grad_norm": 1.4818331950802985, + "learning_rate": 3.2461898597103935e-06, + "loss": 0.21703608334064484, + "step": 3034 + }, + { + "epoch": 1.4996910910663537, + "grad_norm": 1.287764216628678, + "learning_rate": 3.240166556607979e-06, + "loss": 0.24345526099205017, + "step": 3035 + }, + { + "epoch": 1.5001853453601877, + "grad_norm": 1.2134455175661707, + "learning_rate": 3.2341477662737877e-06, + "loss": 0.2428402602672577, + "step": 3036 + }, + { + "epoch": 1.5006795996540219, + "grad_norm": 1.389226279044202, + "learning_rate": 3.228133492725872e-06, + "loss": 0.234619602560997, + "step": 3037 + }, + { + "epoch": 1.501173853947856, + "grad_norm": 1.3308420188359134, + "learning_rate": 3.2221237399792784e-06, + "loss": 0.27995944023132324, + "step": 3038 + }, + { + "epoch": 1.5016681082416903, + "grad_norm": 1.283844133259085, + "learning_rate": 3.2161185120460327e-06, + "loss": 0.23708665370941162, + "step": 3039 + }, + { + "epoch": 1.5021623625355245, + "grad_norm": 1.3268773172813266, + "learning_rate": 3.2101178129351373e-06, + "loss": 0.2541486620903015, + "step": 3040 + }, + { + "epoch": 1.5026566168293587, + "grad_norm": 1.2735534589560005, + "learning_rate": 3.204121646652576e-06, + "loss": 0.2281494140625, + "step": 3041 + }, + { + "epoch": 1.503150871123193, + "grad_norm": 1.4214183804465141, + "learning_rate": 3.1981300172013006e-06, + "loss": 0.24793995916843414, + "step": 3042 + }, + { + "epoch": 1.503645125417027, + "grad_norm": 1.3820844339773122, + "learning_rate": 3.19214292858124e-06, + "loss": 0.25877612829208374, + "step": 3043 + }, + { + "epoch": 1.5041393797108613, + "grad_norm": 1.2606638362034603, + "learning_rate": 3.1861603847892907e-06, + "loss": 0.23822908103466034, + "step": 3044 + }, + { + "epoch": 1.5046336340046955, + "grad_norm": 1.3375723790086107, + "learning_rate": 3.1801823898193075e-06, + "loss": 0.2450297623872757, + "step": 3045 + }, + { + "epoch": 1.5051278882985297, + "grad_norm": 1.291286771303469, + "learning_rate": 3.1742089476621176e-06, + "loss": 0.23657044768333435, + "step": 3046 + }, + { + "epoch": 1.505622142592364, + "grad_norm": 1.330327819651038, + "learning_rate": 3.1682400623055043e-06, + "loss": 0.22040539979934692, + "step": 3047 + }, + { + "epoch": 1.506116396886198, + "grad_norm": 1.2295078748580162, + "learning_rate": 3.162275737734213e-06, + "loss": 0.24671347439289093, + "step": 3048 + }, + { + "epoch": 1.5066106511800321, + "grad_norm": 1.3193055288047242, + "learning_rate": 3.156315977929939e-06, + "loss": 0.2590971291065216, + "step": 3049 + }, + { + "epoch": 1.5071049054738663, + "grad_norm": 1.3201796395435559, + "learning_rate": 3.1503607868713383e-06, + "loss": 0.2650923430919647, + "step": 3050 + }, + { + "epoch": 1.5075991597677005, + "grad_norm": 1.3124240495866886, + "learning_rate": 3.1444101685339987e-06, + "loss": 0.22146420180797577, + "step": 3051 + }, + { + "epoch": 1.5080934140615345, + "grad_norm": 1.3875424644692997, + "learning_rate": 3.1384641268904804e-06, + "loss": 0.26743125915527344, + "step": 3052 + }, + { + "epoch": 1.5085876683553687, + "grad_norm": 1.4406215302595167, + "learning_rate": 3.1325226659102746e-06, + "loss": 0.24730908870697021, + "step": 3053 + }, + { + "epoch": 1.509081922649203, + "grad_norm": 1.3933207280707873, + "learning_rate": 3.1265857895598094e-06, + "loss": 0.26301079988479614, + "step": 3054 + }, + { + "epoch": 1.5095761769430371, + "grad_norm": 1.2589035946994764, + "learning_rate": 3.1206535018024598e-06, + "loss": 0.22815877199172974, + "step": 3055 + }, + { + "epoch": 1.5100704312368713, + "grad_norm": 1.533757049437193, + "learning_rate": 3.114725806598544e-06, + "loss": 0.25178754329681396, + "step": 3056 + }, + { + "epoch": 1.5105646855307056, + "grad_norm": 1.3661154596053653, + "learning_rate": 3.1088027079052973e-06, + "loss": 0.20269548892974854, + "step": 3057 + }, + { + "epoch": 1.5110589398245398, + "grad_norm": 1.4014331356202114, + "learning_rate": 3.1028842096769006e-06, + "loss": 0.25972461700439453, + "step": 3058 + }, + { + "epoch": 1.511553194118374, + "grad_norm": 1.3745096869790834, + "learning_rate": 3.0969703158644583e-06, + "loss": 0.23313641548156738, + "step": 3059 + }, + { + "epoch": 1.5120474484122082, + "grad_norm": 1.2941298023610517, + "learning_rate": 3.0910610304159993e-06, + "loss": 0.2359476238489151, + "step": 3060 + }, + { + "epoch": 1.5125417027060424, + "grad_norm": 1.3631605592123968, + "learning_rate": 3.085156357276481e-06, + "loss": 0.263039767742157, + "step": 3061 + }, + { + "epoch": 1.5130359569998766, + "grad_norm": 1.4414947958352682, + "learning_rate": 3.0792563003877795e-06, + "loss": 0.2222701609134674, + "step": 3062 + }, + { + "epoch": 1.5135302112937106, + "grad_norm": 1.5152386602086467, + "learning_rate": 3.0733608636886815e-06, + "loss": 0.2511240839958191, + "step": 3063 + }, + { + "epoch": 1.5140244655875448, + "grad_norm": 1.3426863589238012, + "learning_rate": 3.0674700511149057e-06, + "loss": 0.26376873254776, + "step": 3064 + }, + { + "epoch": 1.514518719881379, + "grad_norm": 1.50705834278763, + "learning_rate": 3.0615838665990685e-06, + "loss": 0.2883176803588867, + "step": 3065 + }, + { + "epoch": 1.5150129741752132, + "grad_norm": 1.4534493774446482, + "learning_rate": 3.055702314070703e-06, + "loss": 0.2641439437866211, + "step": 3066 + }, + { + "epoch": 1.5155072284690472, + "grad_norm": 1.2206107550113217, + "learning_rate": 3.049825397456252e-06, + "loss": 0.22250229120254517, + "step": 3067 + }, + { + "epoch": 1.5160014827628814, + "grad_norm": 1.6917159383624243, + "learning_rate": 3.0439531206790585e-06, + "loss": 0.291684091091156, + "step": 3068 + }, + { + "epoch": 1.5164957370567156, + "grad_norm": 1.2582948861406589, + "learning_rate": 3.0380854876593725e-06, + "loss": 0.22581104934215546, + "step": 3069 + }, + { + "epoch": 1.5169899913505498, + "grad_norm": 1.3218689478609282, + "learning_rate": 3.032222502314345e-06, + "loss": 0.22701920568943024, + "step": 3070 + }, + { + "epoch": 1.517484245644384, + "grad_norm": 1.4011754473371674, + "learning_rate": 3.0263641685580134e-06, + "loss": 0.27151840925216675, + "step": 3071 + }, + { + "epoch": 1.5179784999382182, + "grad_norm": 1.4319870241234463, + "learning_rate": 3.0205104903013183e-06, + "loss": 0.25780510902404785, + "step": 3072 + }, + { + "epoch": 1.5184727542320524, + "grad_norm": 1.232949136662072, + "learning_rate": 3.014661471452103e-06, + "loss": 0.23905009031295776, + "step": 3073 + }, + { + "epoch": 1.5189670085258866, + "grad_norm": 1.296685135563547, + "learning_rate": 3.0088171159150758e-06, + "loss": 0.25984710454940796, + "step": 3074 + }, + { + "epoch": 1.5194612628197208, + "grad_norm": 1.5925440917505933, + "learning_rate": 3.0029774275918523e-06, + "loss": 0.24934321641921997, + "step": 3075 + }, + { + "epoch": 1.519955517113555, + "grad_norm": 1.3570253725800296, + "learning_rate": 2.997142410380921e-06, + "loss": 0.24181538820266724, + "step": 3076 + }, + { + "epoch": 1.5204497714073892, + "grad_norm": 1.4224922399256614, + "learning_rate": 2.9913120681776586e-06, + "loss": 0.28867265582084656, + "step": 3077 + }, + { + "epoch": 1.5209440257012234, + "grad_norm": 1.3689537883355085, + "learning_rate": 2.9854864048743183e-06, + "loss": 0.25082239508628845, + "step": 3078 + }, + { + "epoch": 1.5214382799950574, + "grad_norm": 1.1809552467181543, + "learning_rate": 2.979665424360031e-06, + "loss": 0.21152186393737793, + "step": 3079 + }, + { + "epoch": 1.5219325342888916, + "grad_norm": 1.3255328033562375, + "learning_rate": 2.9738491305207926e-06, + "loss": 0.22989922761917114, + "step": 3080 + }, + { + "epoch": 1.5224267885827258, + "grad_norm": 1.4352789035320561, + "learning_rate": 2.9680375272394855e-06, + "loss": 0.21606113016605377, + "step": 3081 + }, + { + "epoch": 1.5229210428765598, + "grad_norm": 1.2795767684328416, + "learning_rate": 2.962230618395855e-06, + "loss": 0.25060969591140747, + "step": 3082 + }, + { + "epoch": 1.523415297170394, + "grad_norm": 1.4409246111783223, + "learning_rate": 2.9564284078665016e-06, + "loss": 0.2574993371963501, + "step": 3083 + }, + { + "epoch": 1.5239095514642282, + "grad_norm": 1.3476850353049301, + "learning_rate": 2.9506308995249035e-06, + "loss": 0.2552590072154999, + "step": 3084 + }, + { + "epoch": 1.5244038057580624, + "grad_norm": 1.4294064187721107, + "learning_rate": 2.9448380972413936e-06, + "loss": 0.2356393188238144, + "step": 3085 + }, + { + "epoch": 1.5248980600518967, + "grad_norm": 1.2956637091449177, + "learning_rate": 2.939050004883164e-06, + "loss": 0.25111299753189087, + "step": 3086 + }, + { + "epoch": 1.5253923143457309, + "grad_norm": 1.6187968050107684, + "learning_rate": 2.933266626314263e-06, + "loss": 0.2713226079940796, + "step": 3087 + }, + { + "epoch": 1.525886568639565, + "grad_norm": 1.371480760416421, + "learning_rate": 2.92748796539559e-06, + "loss": 0.2493591606616974, + "step": 3088 + }, + { + "epoch": 1.5263808229333993, + "grad_norm": 1.3919253891743593, + "learning_rate": 2.9217140259848984e-06, + "loss": 0.2377934455871582, + "step": 3089 + }, + { + "epoch": 1.5268750772272335, + "grad_norm": 1.222188939870737, + "learning_rate": 2.9159448119367896e-06, + "loss": 0.23113523423671722, + "step": 3090 + }, + { + "epoch": 1.5273693315210677, + "grad_norm": 1.3071786210451368, + "learning_rate": 2.910180327102702e-06, + "loss": 0.2212657630443573, + "step": 3091 + }, + { + "epoch": 1.527863585814902, + "grad_norm": 1.4809706556535216, + "learning_rate": 2.904420575330923e-06, + "loss": 0.3317147195339203, + "step": 3092 + }, + { + "epoch": 1.528357840108736, + "grad_norm": 1.222501836116789, + "learning_rate": 2.8986655604665914e-06, + "loss": 0.21677865087985992, + "step": 3093 + }, + { + "epoch": 1.52885209440257, + "grad_norm": 1.4687657258901345, + "learning_rate": 2.892915286351663e-06, + "loss": 0.2719038724899292, + "step": 3094 + }, + { + "epoch": 1.5293463486964043, + "grad_norm": 1.4800981330468082, + "learning_rate": 2.887169756824941e-06, + "loss": 0.2870655953884125, + "step": 3095 + }, + { + "epoch": 1.5298406029902385, + "grad_norm": 1.6050530390151894, + "learning_rate": 2.8814289757220636e-06, + "loss": 0.27370864152908325, + "step": 3096 + }, + { + "epoch": 1.5303348572840727, + "grad_norm": 1.2925821727625635, + "learning_rate": 2.8756929468754834e-06, + "loss": 0.24579623341560364, + "step": 3097 + }, + { + "epoch": 1.5308291115779067, + "grad_norm": 1.5466324939604184, + "learning_rate": 2.869961674114501e-06, + "loss": 0.25092196464538574, + "step": 3098 + }, + { + "epoch": 1.531323365871741, + "grad_norm": 1.539826368870157, + "learning_rate": 2.864235161265232e-06, + "loss": 0.29637211561203003, + "step": 3099 + }, + { + "epoch": 1.531817620165575, + "grad_norm": 1.346232107313421, + "learning_rate": 2.8585134121506086e-06, + "loss": 0.24216854572296143, + "step": 3100 + }, + { + "epoch": 1.5323118744594093, + "grad_norm": 1.264644352464564, + "learning_rate": 2.8527964305903887e-06, + "loss": 0.2050018608570099, + "step": 3101 + }, + { + "epoch": 1.5328061287532435, + "grad_norm": 1.4429594327267479, + "learning_rate": 2.8470842204011562e-06, + "loss": 0.2323600798845291, + "step": 3102 + }, + { + "epoch": 1.5333003830470777, + "grad_norm": 1.3588986581117766, + "learning_rate": 2.8413767853962937e-06, + "loss": 0.2582741379737854, + "step": 3103 + }, + { + "epoch": 1.533794637340912, + "grad_norm": 1.2503142010331656, + "learning_rate": 2.8356741293860034e-06, + "loss": 0.2190069705247879, + "step": 3104 + }, + { + "epoch": 1.5342888916347461, + "grad_norm": 1.2700906528895424, + "learning_rate": 2.8299762561773004e-06, + "loss": 0.2293972671031952, + "step": 3105 + }, + { + "epoch": 1.5347831459285803, + "grad_norm": 1.4604730845156306, + "learning_rate": 2.8242831695740004e-06, + "loss": 0.28793102502822876, + "step": 3106 + }, + { + "epoch": 1.5352774002224145, + "grad_norm": 1.3871033704581968, + "learning_rate": 2.8185948733767276e-06, + "loss": 0.25700464844703674, + "step": 3107 + }, + { + "epoch": 1.5357716545162488, + "grad_norm": 1.6036334059609652, + "learning_rate": 2.8129113713829115e-06, + "loss": 0.2633448541164398, + "step": 3108 + }, + { + "epoch": 1.5362659088100827, + "grad_norm": 1.2623866770143863, + "learning_rate": 2.8072326673867667e-06, + "loss": 0.2363145351409912, + "step": 3109 + }, + { + "epoch": 1.536760163103917, + "grad_norm": 1.3073287831639788, + "learning_rate": 2.8015587651793273e-06, + "loss": 0.24324053525924683, + "step": 3110 + }, + { + "epoch": 1.5372544173977511, + "grad_norm": 1.445888976457047, + "learning_rate": 2.795889668548399e-06, + "loss": 0.24139198660850525, + "step": 3111 + }, + { + "epoch": 1.5377486716915854, + "grad_norm": 1.3070463104686283, + "learning_rate": 2.790225381278595e-06, + "loss": 0.2502334713935852, + "step": 3112 + }, + { + "epoch": 1.5382429259854193, + "grad_norm": 1.3233606598015195, + "learning_rate": 2.784565907151311e-06, + "loss": 0.24635109305381775, + "step": 3113 + }, + { + "epoch": 1.5387371802792535, + "grad_norm": 1.236974627125298, + "learning_rate": 2.7789112499447312e-06, + "loss": 0.2299586534500122, + "step": 3114 + }, + { + "epoch": 1.5392314345730878, + "grad_norm": 1.232633224868461, + "learning_rate": 2.7732614134338243e-06, + "loss": 0.2296627312898636, + "step": 3115 + }, + { + "epoch": 1.539725688866922, + "grad_norm": 1.3919487561893158, + "learning_rate": 2.767616401390343e-06, + "loss": 0.26127320528030396, + "step": 3116 + }, + { + "epoch": 1.5402199431607562, + "grad_norm": 1.3612758454379796, + "learning_rate": 2.761976217582808e-06, + "loss": 0.24718445539474487, + "step": 3117 + }, + { + "epoch": 1.5407141974545904, + "grad_norm": 1.3000063965271036, + "learning_rate": 2.7563408657765345e-06, + "loss": 0.22314362227916718, + "step": 3118 + }, + { + "epoch": 1.5412084517484246, + "grad_norm": 1.2190954536725822, + "learning_rate": 2.750710349733602e-06, + "loss": 0.2288416028022766, + "step": 3119 + }, + { + "epoch": 1.5417027060422588, + "grad_norm": 1.3774388084670495, + "learning_rate": 2.7450846732128577e-06, + "loss": 0.26181158423423767, + "step": 3120 + }, + { + "epoch": 1.542196960336093, + "grad_norm": 1.2123920647911897, + "learning_rate": 2.739463839969926e-06, + "loss": 0.22397834062576294, + "step": 3121 + }, + { + "epoch": 1.5426912146299272, + "grad_norm": 1.4361842348504215, + "learning_rate": 2.7338478537571943e-06, + "loss": 0.23633858561515808, + "step": 3122 + }, + { + "epoch": 1.5431854689237614, + "grad_norm": 1.402092217147563, + "learning_rate": 2.7282367183238143e-06, + "loss": 0.26719149947166443, + "step": 3123 + }, + { + "epoch": 1.5436797232175956, + "grad_norm": 1.5260713360749147, + "learning_rate": 2.722630437415701e-06, + "loss": 0.2882165014743805, + "step": 3124 + }, + { + "epoch": 1.5441739775114296, + "grad_norm": 1.258294682394544, + "learning_rate": 2.7170290147755285e-06, + "loss": 0.2377905696630478, + "step": 3125 + }, + { + "epoch": 1.5446682318052638, + "grad_norm": 1.3195147017546947, + "learning_rate": 2.7114324541427193e-06, + "loss": 0.2705368399620056, + "step": 3126 + }, + { + "epoch": 1.545162486099098, + "grad_norm": 1.2857701503132921, + "learning_rate": 2.7058407592534663e-06, + "loss": 0.246593177318573, + "step": 3127 + }, + { + "epoch": 1.5456567403929322, + "grad_norm": 1.33265619524068, + "learning_rate": 2.700253933840705e-06, + "loss": 0.2339816391468048, + "step": 3128 + }, + { + "epoch": 1.5461509946867662, + "grad_norm": 1.3254997645322988, + "learning_rate": 2.6946719816341127e-06, + "loss": 0.2727898359298706, + "step": 3129 + }, + { + "epoch": 1.5466452489806004, + "grad_norm": 1.483440007746236, + "learning_rate": 2.6890949063601255e-06, + "loss": 0.285343736410141, + "step": 3130 + }, + { + "epoch": 1.5471395032744346, + "grad_norm": 1.4219498161281177, + "learning_rate": 2.6835227117419184e-06, + "loss": 0.25782397389411926, + "step": 3131 + }, + { + "epoch": 1.5476337575682688, + "grad_norm": 1.4096561970820742, + "learning_rate": 2.67795540149941e-06, + "loss": 0.26677054166793823, + "step": 3132 + }, + { + "epoch": 1.548128011862103, + "grad_norm": 1.375758748898483, + "learning_rate": 2.6723929793492555e-06, + "loss": 0.2696993052959442, + "step": 3133 + }, + { + "epoch": 1.5486222661559372, + "grad_norm": 1.3214248540646165, + "learning_rate": 2.66683544900485e-06, + "loss": 0.2536013424396515, + "step": 3134 + }, + { + "epoch": 1.5491165204497714, + "grad_norm": 1.352660590997614, + "learning_rate": 2.661282814176319e-06, + "loss": 0.2583885192871094, + "step": 3135 + }, + { + "epoch": 1.5496107747436056, + "grad_norm": 1.3555750519784333, + "learning_rate": 2.655735078570528e-06, + "loss": 0.24341340363025665, + "step": 3136 + }, + { + "epoch": 1.5501050290374399, + "grad_norm": 1.3694743065317843, + "learning_rate": 2.650192245891059e-06, + "loss": 0.2575637698173523, + "step": 3137 + }, + { + "epoch": 1.550599283331274, + "grad_norm": 1.3743479794773286, + "learning_rate": 2.644654319838227e-06, + "loss": 0.24109753966331482, + "step": 3138 + }, + { + "epoch": 1.5510935376251083, + "grad_norm": 1.2822421062589742, + "learning_rate": 2.6391213041090822e-06, + "loss": 0.246525377035141, + "step": 3139 + }, + { + "epoch": 1.5515877919189422, + "grad_norm": 1.3144657839500415, + "learning_rate": 2.6335932023973777e-06, + "loss": 0.2589566111564636, + "step": 3140 + }, + { + "epoch": 1.5520820462127765, + "grad_norm": 1.333811387247849, + "learning_rate": 2.628070018393598e-06, + "loss": 0.26198744773864746, + "step": 3141 + }, + { + "epoch": 1.5525763005066107, + "grad_norm": 1.2808916237604833, + "learning_rate": 2.622551755784942e-06, + "loss": 0.22991782426834106, + "step": 3142 + }, + { + "epoch": 1.5530705548004449, + "grad_norm": 1.242582313641482, + "learning_rate": 2.6170384182553244e-06, + "loss": 0.22211629152297974, + "step": 3143 + }, + { + "epoch": 1.5535648090942789, + "grad_norm": 1.306994517774283, + "learning_rate": 2.6115300094853666e-06, + "loss": 0.2665289640426636, + "step": 3144 + }, + { + "epoch": 1.554059063388113, + "grad_norm": 1.260713008188702, + "learning_rate": 2.6060265331524114e-06, + "loss": 0.20211085677146912, + "step": 3145 + }, + { + "epoch": 1.5545533176819473, + "grad_norm": 1.3930467289400041, + "learning_rate": 2.6005279929304918e-06, + "loss": 0.24264919757843018, + "step": 3146 + }, + { + "epoch": 1.5550475719757815, + "grad_norm": 1.316241217623005, + "learning_rate": 2.595034392490354e-06, + "loss": 0.2722601294517517, + "step": 3147 + }, + { + "epoch": 1.5555418262696157, + "grad_norm": 1.3463437829147908, + "learning_rate": 2.58954573549946e-06, + "loss": 0.26061201095581055, + "step": 3148 + }, + { + "epoch": 1.5560360805634499, + "grad_norm": 1.3701131034296847, + "learning_rate": 2.5840620256219464e-06, + "loss": 0.20620305836200714, + "step": 3149 + }, + { + "epoch": 1.556530334857284, + "grad_norm": 1.3323948648350379, + "learning_rate": 2.578583266518664e-06, + "loss": 0.2424723207950592, + "step": 3150 + }, + { + "epoch": 1.5570245891511183, + "grad_norm": 1.4286998078779003, + "learning_rate": 2.573109461847153e-06, + "loss": 0.248019739985466, + "step": 3151 + }, + { + "epoch": 1.5575188434449525, + "grad_norm": 1.2753051030343154, + "learning_rate": 2.5676406152616483e-06, + "loss": 0.23162522912025452, + "step": 3152 + }, + { + "epoch": 1.5580130977387867, + "grad_norm": 1.6072180292151754, + "learning_rate": 2.562176730413074e-06, + "loss": 0.20099176466464996, + "step": 3153 + }, + { + "epoch": 1.558507352032621, + "grad_norm": 1.4868098360756863, + "learning_rate": 2.5567178109490433e-06, + "loss": 0.27957430481910706, + "step": 3154 + }, + { + "epoch": 1.5590016063264551, + "grad_norm": 1.248830156095604, + "learning_rate": 2.551263860513845e-06, + "loss": 0.23941464722156525, + "step": 3155 + }, + { + "epoch": 1.559495860620289, + "grad_norm": 1.4371594834198067, + "learning_rate": 2.5458148827484695e-06, + "loss": 0.24910275638103485, + "step": 3156 + }, + { + "epoch": 1.5599901149141233, + "grad_norm": 1.325153365111165, + "learning_rate": 2.540370881290568e-06, + "loss": 0.26430344581604004, + "step": 3157 + }, + { + "epoch": 1.5604843692079575, + "grad_norm": 1.419775898075986, + "learning_rate": 2.534931859774481e-06, + "loss": 0.2833614945411682, + "step": 3158 + }, + { + "epoch": 1.5609786235017915, + "grad_norm": 1.2863995969426358, + "learning_rate": 2.5294978218312215e-06, + "loss": 0.24630708992481232, + "step": 3159 + }, + { + "epoch": 1.5614728777956257, + "grad_norm": 1.398973984381973, + "learning_rate": 2.524068771088476e-06, + "loss": 0.2674857974052429, + "step": 3160 + }, + { + "epoch": 1.56196713208946, + "grad_norm": 1.34356245737179, + "learning_rate": 2.5186447111706005e-06, + "loss": 0.23531441390514374, + "step": 3161 + }, + { + "epoch": 1.5624613863832941, + "grad_norm": 1.2374731185400574, + "learning_rate": 2.5132256456986236e-06, + "loss": 0.2603223919868469, + "step": 3162 + }, + { + "epoch": 1.5629556406771283, + "grad_norm": 1.302457785178724, + "learning_rate": 2.5078115782902267e-06, + "loss": 0.220007985830307, + "step": 3163 + }, + { + "epoch": 1.5634498949709625, + "grad_norm": 1.36046018530454, + "learning_rate": 2.502402512559773e-06, + "loss": 0.22660651803016663, + "step": 3164 + }, + { + "epoch": 1.5639441492647967, + "grad_norm": 1.4627286861974862, + "learning_rate": 2.4969984521182766e-06, + "loss": 0.26425695419311523, + "step": 3165 + }, + { + "epoch": 1.564438403558631, + "grad_norm": 1.3019070428865334, + "learning_rate": 2.4915994005734057e-06, + "loss": 0.22870787978172302, + "step": 3166 + }, + { + "epoch": 1.5649326578524652, + "grad_norm": 1.2622414815912377, + "learning_rate": 2.48620536152949e-06, + "loss": 0.25734084844589233, + "step": 3167 + }, + { + "epoch": 1.5654269121462994, + "grad_norm": 1.2954820564672134, + "learning_rate": 2.4808163385875226e-06, + "loss": 0.24831843376159668, + "step": 3168 + }, + { + "epoch": 1.5659211664401336, + "grad_norm": 1.3356720372460569, + "learning_rate": 2.4754323353451284e-06, + "loss": 0.2389685958623886, + "step": 3169 + }, + { + "epoch": 1.5664154207339678, + "grad_norm": 1.33182477221405, + "learning_rate": 2.4700533553965946e-06, + "loss": 0.24750663340091705, + "step": 3170 + }, + { + "epoch": 1.5669096750278018, + "grad_norm": 1.4158946259185428, + "learning_rate": 2.4646794023328525e-06, + "loss": 0.2689003348350525, + "step": 3171 + }, + { + "epoch": 1.567403929321636, + "grad_norm": 1.32371836304635, + "learning_rate": 2.45931047974147e-06, + "loss": 0.2574145197868347, + "step": 3172 + }, + { + "epoch": 1.5678981836154702, + "grad_norm": 1.5403400973166155, + "learning_rate": 2.4539465912066706e-06, + "loss": 0.2586211562156677, + "step": 3173 + }, + { + "epoch": 1.5683924379093044, + "grad_norm": 1.340393455505496, + "learning_rate": 2.4485877403093095e-06, + "loss": 0.26383671164512634, + "step": 3174 + }, + { + "epoch": 1.5688866922031384, + "grad_norm": 1.2806590186816509, + "learning_rate": 2.4432339306268736e-06, + "loss": 0.28196123242378235, + "step": 3175 + }, + { + "epoch": 1.5693809464969726, + "grad_norm": 1.4692337066995136, + "learning_rate": 2.4378851657334923e-06, + "loss": 0.2736835181713104, + "step": 3176 + }, + { + "epoch": 1.5698752007908068, + "grad_norm": 1.3442483287569258, + "learning_rate": 2.4325414491999255e-06, + "loss": 0.2316201627254486, + "step": 3177 + }, + { + "epoch": 1.570369455084641, + "grad_norm": 1.363437265904272, + "learning_rate": 2.427202784593562e-06, + "loss": 0.23955810070037842, + "step": 3178 + }, + { + "epoch": 1.5708637093784752, + "grad_norm": 1.4240865879172782, + "learning_rate": 2.4218691754784162e-06, + "loss": 0.263042151927948, + "step": 3179 + }, + { + "epoch": 1.5713579636723094, + "grad_norm": 1.3283544396978941, + "learning_rate": 2.4165406254151312e-06, + "loss": 0.25570976734161377, + "step": 3180 + }, + { + "epoch": 1.5718522179661436, + "grad_norm": 1.3508561425487733, + "learning_rate": 2.4112171379609696e-06, + "loss": 0.2503488063812256, + "step": 3181 + }, + { + "epoch": 1.5723464722599778, + "grad_norm": 1.2731349274514334, + "learning_rate": 2.40589871666982e-06, + "loss": 0.21815824508666992, + "step": 3182 + }, + { + "epoch": 1.572840726553812, + "grad_norm": 1.4354076907799536, + "learning_rate": 2.400585365092177e-06, + "loss": 0.23936739563941956, + "step": 3183 + }, + { + "epoch": 1.5733349808476462, + "grad_norm": 1.2459112031686363, + "learning_rate": 2.3952770867751595e-06, + "loss": 0.2618086636066437, + "step": 3184 + }, + { + "epoch": 1.5738292351414804, + "grad_norm": 1.477109441631464, + "learning_rate": 2.3899738852625065e-06, + "loss": 0.2852020263671875, + "step": 3185 + }, + { + "epoch": 1.5743234894353144, + "grad_norm": 1.4364121007652697, + "learning_rate": 2.3846757640945505e-06, + "loss": 0.28860047459602356, + "step": 3186 + }, + { + "epoch": 1.5748177437291486, + "grad_norm": 1.2738328733534112, + "learning_rate": 2.3793827268082446e-06, + "loss": 0.2397383451461792, + "step": 3187 + }, + { + "epoch": 1.5753119980229828, + "grad_norm": 1.3548543446694599, + "learning_rate": 2.374094776937145e-06, + "loss": 0.25204962491989136, + "step": 3188 + }, + { + "epoch": 1.575806252316817, + "grad_norm": 1.2908932541507008, + "learning_rate": 2.368811918011411e-06, + "loss": 0.21216189861297607, + "step": 3189 + }, + { + "epoch": 1.576300506610651, + "grad_norm": 1.4719289728075926, + "learning_rate": 2.363534153557805e-06, + "loss": 0.2647620737552643, + "step": 3190 + }, + { + "epoch": 1.5767947609044852, + "grad_norm": 1.4154428976481128, + "learning_rate": 2.358261487099688e-06, + "loss": 0.3079666793346405, + "step": 3191 + }, + { + "epoch": 1.5772890151983194, + "grad_norm": 1.1847060614906242, + "learning_rate": 2.352993922157013e-06, + "loss": 0.22961711883544922, + "step": 3192 + }, + { + "epoch": 1.5777832694921536, + "grad_norm": 1.5460794294977342, + "learning_rate": 2.347731462246331e-06, + "loss": 0.2657305598258972, + "step": 3193 + }, + { + "epoch": 1.5782775237859878, + "grad_norm": 1.2622234684788671, + "learning_rate": 2.3424741108807914e-06, + "loss": 0.224237859249115, + "step": 3194 + }, + { + "epoch": 1.578771778079822, + "grad_norm": 1.4036688905605132, + "learning_rate": 2.337221871570121e-06, + "loss": 0.26459985971450806, + "step": 3195 + }, + { + "epoch": 1.5792660323736563, + "grad_norm": 1.4237290486306964, + "learning_rate": 2.331974747820641e-06, + "loss": 0.25391027331352234, + "step": 3196 + }, + { + "epoch": 1.5797602866674905, + "grad_norm": 1.3683418214908574, + "learning_rate": 2.326732743135256e-06, + "loss": 0.25822141766548157, + "step": 3197 + }, + { + "epoch": 1.5802545409613247, + "grad_norm": 1.3569651988075904, + "learning_rate": 2.3214958610134554e-06, + "loss": 0.25140073895454407, + "step": 3198 + }, + { + "epoch": 1.5807487952551589, + "grad_norm": 1.280802230226295, + "learning_rate": 2.3162641049513035e-06, + "loss": 0.2550397515296936, + "step": 3199 + }, + { + "epoch": 1.581243049548993, + "grad_norm": 1.3770416210337255, + "learning_rate": 2.3110374784414526e-06, + "loss": 0.2648996412754059, + "step": 3200 + }, + { + "epoch": 1.5817373038428273, + "grad_norm": 1.285627272529884, + "learning_rate": 2.3058159849731134e-06, + "loss": 0.235626682639122, + "step": 3201 + }, + { + "epoch": 1.5822315581366613, + "grad_norm": 1.354562155318599, + "learning_rate": 2.3005996280320873e-06, + "loss": 0.24930328130722046, + "step": 3202 + }, + { + "epoch": 1.5827258124304955, + "grad_norm": 1.299026803187305, + "learning_rate": 2.2953884111007428e-06, + "loss": 0.23712117969989777, + "step": 3203 + }, + { + "epoch": 1.5832200667243297, + "grad_norm": 1.4407443338733177, + "learning_rate": 2.290182337658007e-06, + "loss": 0.2504096031188965, + "step": 3204 + }, + { + "epoch": 1.583714321018164, + "grad_norm": 1.345261370550347, + "learning_rate": 2.2849814111793823e-06, + "loss": 0.2218465358018875, + "step": 3205 + }, + { + "epoch": 1.5842085753119979, + "grad_norm": 1.3818182639369938, + "learning_rate": 2.279785635136933e-06, + "loss": 0.2653011977672577, + "step": 3206 + }, + { + "epoch": 1.584702829605832, + "grad_norm": 1.521658991035551, + "learning_rate": 2.2745950129992853e-06, + "loss": 0.27551597356796265, + "step": 3207 + }, + { + "epoch": 1.5851970838996663, + "grad_norm": 1.2816405701256748, + "learning_rate": 2.2694095482316247e-06, + "loss": 0.21494519710540771, + "step": 3208 + }, + { + "epoch": 1.5856913381935005, + "grad_norm": 1.2804333364342155, + "learning_rate": 2.2642292442956925e-06, + "loss": 0.2517405152320862, + "step": 3209 + }, + { + "epoch": 1.5861855924873347, + "grad_norm": 1.365131298274178, + "learning_rate": 2.259054104649786e-06, + "loss": 0.25777050852775574, + "step": 3210 + }, + { + "epoch": 1.586679846781169, + "grad_norm": 1.3722239172040558, + "learning_rate": 2.2538841327487582e-06, + "loss": 0.25914469361305237, + "step": 3211 + }, + { + "epoch": 1.5871741010750031, + "grad_norm": 1.3924091851436682, + "learning_rate": 2.2487193320440017e-06, + "loss": 0.23877818882465363, + "step": 3212 + }, + { + "epoch": 1.5876683553688373, + "grad_norm": 1.2757007530985867, + "learning_rate": 2.2435597059834635e-06, + "loss": 0.2226967066526413, + "step": 3213 + }, + { + "epoch": 1.5881626096626715, + "grad_norm": 1.400079876174728, + "learning_rate": 2.2384052580116465e-06, + "loss": 0.28768399357795715, + "step": 3214 + }, + { + "epoch": 1.5886568639565057, + "grad_norm": 1.3700126786923876, + "learning_rate": 2.233255991569575e-06, + "loss": 0.2563883662223816, + "step": 3215 + }, + { + "epoch": 1.58915111825034, + "grad_norm": 1.3688176323163237, + "learning_rate": 2.2281119100948322e-06, + "loss": 0.2595394551753998, + "step": 3216 + }, + { + "epoch": 1.589645372544174, + "grad_norm": 1.2924408591101029, + "learning_rate": 2.2229730170215324e-06, + "loss": 0.2354460060596466, + "step": 3217 + }, + { + "epoch": 1.5901396268380081, + "grad_norm": 1.3015321221613778, + "learning_rate": 2.2178393157803225e-06, + "loss": 0.2397463619709015, + "step": 3218 + }, + { + "epoch": 1.5906338811318423, + "grad_norm": 1.5213512082778142, + "learning_rate": 2.212710809798393e-06, + "loss": 0.3304588794708252, + "step": 3219 + }, + { + "epoch": 1.5911281354256765, + "grad_norm": 1.2850321771259765, + "learning_rate": 2.207587502499464e-06, + "loss": 0.23891952633857727, + "step": 3220 + }, + { + "epoch": 1.5916223897195105, + "grad_norm": 1.3962733600240735, + "learning_rate": 2.2024693973037747e-06, + "loss": 0.2544774115085602, + "step": 3221 + }, + { + "epoch": 1.5921166440133447, + "grad_norm": 1.4193118785950918, + "learning_rate": 2.1973564976281003e-06, + "loss": 0.2620859444141388, + "step": 3222 + }, + { + "epoch": 1.592610898307179, + "grad_norm": 1.2794541555838774, + "learning_rate": 2.192248806885747e-06, + "loss": 0.22541281580924988, + "step": 3223 + }, + { + "epoch": 1.5931051526010132, + "grad_norm": 1.2886654382919192, + "learning_rate": 2.187146328486529e-06, + "loss": 0.23454351723194122, + "step": 3224 + }, + { + "epoch": 1.5935994068948474, + "grad_norm": 1.3236984572567387, + "learning_rate": 2.18204906583679e-06, + "loss": 0.24848732352256775, + "step": 3225 + }, + { + "epoch": 1.5940936611886816, + "grad_norm": 1.2049251354008288, + "learning_rate": 2.176957022339389e-06, + "loss": 0.21949590742588043, + "step": 3226 + }, + { + "epoch": 1.5945879154825158, + "grad_norm": 1.3436660246382202, + "learning_rate": 2.171870201393703e-06, + "loss": 0.27300944924354553, + "step": 3227 + }, + { + "epoch": 1.59508216977635, + "grad_norm": 1.3272049247129862, + "learning_rate": 2.16678860639562e-06, + "loss": 0.23850613832473755, + "step": 3228 + }, + { + "epoch": 1.5955764240701842, + "grad_norm": 1.4611172116234663, + "learning_rate": 2.1617122407375424e-06, + "loss": 0.2728792428970337, + "step": 3229 + }, + { + "epoch": 1.5960706783640184, + "grad_norm": 1.4623582491499052, + "learning_rate": 2.1566411078083726e-06, + "loss": 0.2321755588054657, + "step": 3230 + }, + { + "epoch": 1.5965649326578526, + "grad_norm": 1.5111460470858884, + "learning_rate": 2.1515752109935374e-06, + "loss": 0.30118101835250854, + "step": 3231 + }, + { + "epoch": 1.5970591869516868, + "grad_norm": 1.2041348970592753, + "learning_rate": 2.1465145536749475e-06, + "loss": 0.22317390143871307, + "step": 3232 + }, + { + "epoch": 1.5975534412455208, + "grad_norm": 1.4530812438401597, + "learning_rate": 2.141459139231029e-06, + "loss": 0.2906285524368286, + "step": 3233 + }, + { + "epoch": 1.598047695539355, + "grad_norm": 1.3996891865587815, + "learning_rate": 2.136408971036704e-06, + "loss": 0.24645069241523743, + "step": 3234 + }, + { + "epoch": 1.5985419498331892, + "grad_norm": 1.4725365119055005, + "learning_rate": 2.1313640524633927e-06, + "loss": 0.26764121651649475, + "step": 3235 + }, + { + "epoch": 1.5990362041270234, + "grad_norm": 1.4093554565168636, + "learning_rate": 2.126324386879012e-06, + "loss": 0.2811397910118103, + "step": 3236 + }, + { + "epoch": 1.5995304584208574, + "grad_norm": 1.5104838755570678, + "learning_rate": 2.121289977647971e-06, + "loss": 0.254316508769989, + "step": 3237 + }, + { + "epoch": 1.6000247127146916, + "grad_norm": 1.378432417546232, + "learning_rate": 2.1162608281311636e-06, + "loss": 0.2479352205991745, + "step": 3238 + }, + { + "epoch": 1.6005189670085258, + "grad_norm": 1.200711868039053, + "learning_rate": 2.1112369416859847e-06, + "loss": 0.22767537832260132, + "step": 3239 + }, + { + "epoch": 1.60101322130236, + "grad_norm": 1.3194506024522585, + "learning_rate": 2.106218321666309e-06, + "loss": 0.24286411702632904, + "step": 3240 + }, + { + "epoch": 1.6015074755961942, + "grad_norm": 1.4143801874217299, + "learning_rate": 2.1012049714224914e-06, + "loss": 0.22960595786571503, + "step": 3241 + }, + { + "epoch": 1.6020017298900284, + "grad_norm": 1.3710437918045983, + "learning_rate": 2.0961968943013742e-06, + "loss": 0.2448965162038803, + "step": 3242 + }, + { + "epoch": 1.6024959841838626, + "grad_norm": 1.3544107087641921, + "learning_rate": 2.0911940936462794e-06, + "loss": 0.23486846685409546, + "step": 3243 + }, + { + "epoch": 1.6029902384776968, + "grad_norm": 1.576868207611872, + "learning_rate": 2.0861965727970045e-06, + "loss": 0.2470572590827942, + "step": 3244 + }, + { + "epoch": 1.603484492771531, + "grad_norm": 1.3371604672673962, + "learning_rate": 2.0812043350898226e-06, + "loss": 0.283765971660614, + "step": 3245 + }, + { + "epoch": 1.6039787470653653, + "grad_norm": 1.3089763025714083, + "learning_rate": 2.076217383857484e-06, + "loss": 0.24943199753761292, + "step": 3246 + }, + { + "epoch": 1.6044730013591995, + "grad_norm": 1.2872721191375163, + "learning_rate": 2.0712357224291966e-06, + "loss": 0.22150146961212158, + "step": 3247 + }, + { + "epoch": 1.6049672556530334, + "grad_norm": 1.3204310548386595, + "learning_rate": 2.0662593541306563e-06, + "loss": 0.2610163390636444, + "step": 3248 + }, + { + "epoch": 1.6054615099468676, + "grad_norm": 1.243779369506435, + "learning_rate": 2.0612882822840154e-06, + "loss": 0.22789397835731506, + "step": 3249 + }, + { + "epoch": 1.6059557642407019, + "grad_norm": 1.3699765130937176, + "learning_rate": 2.056322510207882e-06, + "loss": 0.22956407070159912, + "step": 3250 + }, + { + "epoch": 1.606450018534536, + "grad_norm": 1.3752485526796745, + "learning_rate": 2.051362041217341e-06, + "loss": 0.2579299509525299, + "step": 3251 + }, + { + "epoch": 1.60694427282837, + "grad_norm": 1.449594870075983, + "learning_rate": 2.046406878623929e-06, + "loss": 0.24655218422412872, + "step": 3252 + }, + { + "epoch": 1.6074385271222043, + "grad_norm": 1.8413073723455704, + "learning_rate": 2.0414570257356415e-06, + "loss": 0.2325882464647293, + "step": 3253 + }, + { + "epoch": 1.6079327814160385, + "grad_norm": 1.3704743037638702, + "learning_rate": 2.0365124858569294e-06, + "loss": 0.2678581476211548, + "step": 3254 + }, + { + "epoch": 1.6084270357098727, + "grad_norm": 1.3329052595945479, + "learning_rate": 2.0315732622886976e-06, + "loss": 0.23200136423110962, + "step": 3255 + }, + { + "epoch": 1.6089212900037069, + "grad_norm": 1.3902434854443921, + "learning_rate": 2.0266393583283015e-06, + "loss": 0.24957536160945892, + "step": 3256 + }, + { + "epoch": 1.609415544297541, + "grad_norm": 1.3655551679458238, + "learning_rate": 2.0217107772695467e-06, + "loss": 0.2506657540798187, + "step": 3257 + }, + { + "epoch": 1.6099097985913753, + "grad_norm": 1.3749666602598227, + "learning_rate": 2.0167875224026788e-06, + "loss": 0.22255182266235352, + "step": 3258 + }, + { + "epoch": 1.6104040528852095, + "grad_norm": 1.222166534445823, + "learning_rate": 2.011869597014392e-06, + "loss": 0.2489611655473709, + "step": 3259 + }, + { + "epoch": 1.6108983071790437, + "grad_norm": 1.3283456479938487, + "learning_rate": 2.0069570043878305e-06, + "loss": 0.24808533489704132, + "step": 3260 + }, + { + "epoch": 1.611392561472878, + "grad_norm": 1.3963788000978605, + "learning_rate": 2.0020497478025635e-06, + "loss": 0.24013441801071167, + "step": 3261 + }, + { + "epoch": 1.6118868157667121, + "grad_norm": 1.4500389435817727, + "learning_rate": 1.997147830534608e-06, + "loss": 0.2813841998577118, + "step": 3262 + }, + { + "epoch": 1.612381070060546, + "grad_norm": 1.4817798618081903, + "learning_rate": 1.9922512558564154e-06, + "loss": 0.23727375268936157, + "step": 3263 + }, + { + "epoch": 1.6128753243543803, + "grad_norm": 1.3341145171932982, + "learning_rate": 1.9873600270368664e-06, + "loss": 0.2341655194759369, + "step": 3264 + }, + { + "epoch": 1.6133695786482145, + "grad_norm": 1.4911949653625025, + "learning_rate": 1.9824741473412768e-06, + "loss": 0.32069963216781616, + "step": 3265 + }, + { + "epoch": 1.6138638329420487, + "grad_norm": 1.4329702924332965, + "learning_rate": 1.977593620031393e-06, + "loss": 0.2414681762456894, + "step": 3266 + }, + { + "epoch": 1.6143580872358827, + "grad_norm": 1.4261430753271709, + "learning_rate": 1.9727184483653793e-06, + "loss": 0.25517842173576355, + "step": 3267 + }, + { + "epoch": 1.614852341529717, + "grad_norm": 1.4664579571771421, + "learning_rate": 1.967848635597831e-06, + "loss": 0.28264889121055603, + "step": 3268 + }, + { + "epoch": 1.6153465958235511, + "grad_norm": 1.2722226456356633, + "learning_rate": 1.962984184979774e-06, + "loss": 0.24543075263500214, + "step": 3269 + }, + { + "epoch": 1.6158408501173853, + "grad_norm": 1.3971489540759634, + "learning_rate": 1.9581250997586366e-06, + "loss": 0.2770763039588928, + "step": 3270 + }, + { + "epoch": 1.6163351044112195, + "grad_norm": 1.362601366326608, + "learning_rate": 1.953271383178278e-06, + "loss": 0.2521423101425171, + "step": 3271 + }, + { + "epoch": 1.6168293587050537, + "grad_norm": 1.262726405313237, + "learning_rate": 1.9484230384789702e-06, + "loss": 0.2402455359697342, + "step": 3272 + }, + { + "epoch": 1.617323612998888, + "grad_norm": 1.4929089203163604, + "learning_rate": 1.9435800688974005e-06, + "loss": 0.2947021424770355, + "step": 3273 + }, + { + "epoch": 1.6178178672927221, + "grad_norm": 1.2382587228414774, + "learning_rate": 1.938742477666663e-06, + "loss": 0.22238701581954956, + "step": 3274 + }, + { + "epoch": 1.6183121215865564, + "grad_norm": 1.2835510888376274, + "learning_rate": 1.933910268016269e-06, + "loss": 0.25475019216537476, + "step": 3275 + }, + { + "epoch": 1.6188063758803906, + "grad_norm": 1.2545564646453307, + "learning_rate": 1.929083443172125e-06, + "loss": 0.2316315472126007, + "step": 3276 + }, + { + "epoch": 1.6193006301742248, + "grad_norm": 1.2739392933893041, + "learning_rate": 1.9242620063565598e-06, + "loss": 0.24977952241897583, + "step": 3277 + }, + { + "epoch": 1.619794884468059, + "grad_norm": 1.3712132397422443, + "learning_rate": 1.9194459607882887e-06, + "loss": 0.24006152153015137, + "step": 3278 + }, + { + "epoch": 1.620289138761893, + "grad_norm": 1.2866259343493134, + "learning_rate": 1.9146353096824366e-06, + "loss": 0.26050522923469543, + "step": 3279 + }, + { + "epoch": 1.6207833930557272, + "grad_norm": 1.338449999730035, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.2698773443698883, + "step": 3280 + }, + { + "epoch": 1.6212776473495614, + "grad_norm": 1.480404977138073, + "learning_rate": 1.9050302037004765e-06, + "loss": 0.2627784013748169, + "step": 3281 + }, + { + "epoch": 1.6217719016433956, + "grad_norm": 1.3335627547093958, + "learning_rate": 1.900235755236599e-06, + "loss": 0.24261148273944855, + "step": 3282 + }, + { + "epoch": 1.6222661559372296, + "grad_norm": 1.347149973540751, + "learning_rate": 1.8954467140596023e-06, + "loss": 0.24689635634422302, + "step": 3283 + }, + { + "epoch": 1.6227604102310638, + "grad_norm": 1.4586477344669697, + "learning_rate": 1.890663083366574e-06, + "loss": 0.2885867953300476, + "step": 3284 + }, + { + "epoch": 1.623254664524898, + "grad_norm": 1.2981242679817548, + "learning_rate": 1.8858848663510066e-06, + "loss": 0.2624407112598419, + "step": 3285 + }, + { + "epoch": 1.6237489188187322, + "grad_norm": 1.4544775837390882, + "learning_rate": 1.881112066202767e-06, + "loss": 0.27705928683280945, + "step": 3286 + }, + { + "epoch": 1.6242431731125664, + "grad_norm": 1.4465119903360202, + "learning_rate": 1.8763446861081058e-06, + "loss": 0.26406094431877136, + "step": 3287 + }, + { + "epoch": 1.6247374274064006, + "grad_norm": 1.3239739188563808, + "learning_rate": 1.8715827292496557e-06, + "loss": 0.26495790481567383, + "step": 3288 + }, + { + "epoch": 1.6252316817002348, + "grad_norm": 1.419298583557058, + "learning_rate": 1.8668261988064406e-06, + "loss": 0.24995195865631104, + "step": 3289 + }, + { + "epoch": 1.625725935994069, + "grad_norm": 1.4058286500391235, + "learning_rate": 1.8620750979538437e-06, + "loss": 0.23043034970760345, + "step": 3290 + }, + { + "epoch": 1.6262201902879032, + "grad_norm": 1.3959905154788135, + "learning_rate": 1.8573294298636334e-06, + "loss": 0.2590731978416443, + "step": 3291 + }, + { + "epoch": 1.6267144445817374, + "grad_norm": 1.3919450960931963, + "learning_rate": 1.8525891977039557e-06, + "loss": 0.24246811866760254, + "step": 3292 + }, + { + "epoch": 1.6272086988755716, + "grad_norm": 1.2790623939923147, + "learning_rate": 1.847854404639311e-06, + "loss": 0.2386825680732727, + "step": 3293 + }, + { + "epoch": 1.6277029531694056, + "grad_norm": 1.3168324939527787, + "learning_rate": 1.843125053830588e-06, + "loss": 0.2243885099887848, + "step": 3294 + }, + { + "epoch": 1.6281972074632398, + "grad_norm": 1.264397606173487, + "learning_rate": 1.838401148435035e-06, + "loss": 0.24984796345233917, + "step": 3295 + }, + { + "epoch": 1.628691461757074, + "grad_norm": 1.3015406971863621, + "learning_rate": 1.8336826916062568e-06, + "loss": 0.22784638404846191, + "step": 3296 + }, + { + "epoch": 1.6291857160509082, + "grad_norm": 1.4178841831435534, + "learning_rate": 1.828969686494232e-06, + "loss": 0.24812597036361694, + "step": 3297 + }, + { + "epoch": 1.6296799703447422, + "grad_norm": 1.2132930880582795, + "learning_rate": 1.8242621362452939e-06, + "loss": 0.234031543135643, + "step": 3298 + }, + { + "epoch": 1.6301742246385764, + "grad_norm": 1.251471335677166, + "learning_rate": 1.8195600440021377e-06, + "loss": 0.22455371916294098, + "step": 3299 + }, + { + "epoch": 1.6306684789324106, + "grad_norm": 1.2488121980955387, + "learning_rate": 1.8148634129038113e-06, + "loss": 0.22605910897254944, + "step": 3300 + }, + { + "epoch": 1.6311627332262448, + "grad_norm": 1.3484568896035969, + "learning_rate": 1.8101722460857184e-06, + "loss": 0.2527684271335602, + "step": 3301 + }, + { + "epoch": 1.631656987520079, + "grad_norm": 1.355365003110194, + "learning_rate": 1.8054865466796167e-06, + "loss": 0.24625766277313232, + "step": 3302 + }, + { + "epoch": 1.6321512418139132, + "grad_norm": 1.7061568076136007, + "learning_rate": 1.8008063178136125e-06, + "loss": 0.31236231327056885, + "step": 3303 + }, + { + "epoch": 1.6326454961077475, + "grad_norm": 1.3433390649211776, + "learning_rate": 1.7961315626121566e-06, + "loss": 0.21256005764007568, + "step": 3304 + }, + { + "epoch": 1.6331397504015817, + "grad_norm": 1.5039173087965194, + "learning_rate": 1.7914622841960482e-06, + "loss": 0.25238949060440063, + "step": 3305 + }, + { + "epoch": 1.6336340046954159, + "grad_norm": 1.3709723014330413, + "learning_rate": 1.7867984856824382e-06, + "loss": 0.29630619287490845, + "step": 3306 + }, + { + "epoch": 1.63412825898925, + "grad_norm": 1.3891654533842075, + "learning_rate": 1.782140170184804e-06, + "loss": 0.26159363985061646, + "step": 3307 + }, + { + "epoch": 1.6346225132830843, + "grad_norm": 1.2884457367333761, + "learning_rate": 1.7774873408129733e-06, + "loss": 0.22361448407173157, + "step": 3308 + }, + { + "epoch": 1.6351167675769185, + "grad_norm": 1.410142665529872, + "learning_rate": 1.7728400006731083e-06, + "loss": 0.23890942335128784, + "step": 3309 + }, + { + "epoch": 1.6356110218707525, + "grad_norm": 1.3147986477314286, + "learning_rate": 1.7681981528677073e-06, + "loss": 0.23067504167556763, + "step": 3310 + }, + { + "epoch": 1.6361052761645867, + "grad_norm": 1.4202307068972662, + "learning_rate": 1.7635618004956012e-06, + "loss": 0.24790561199188232, + "step": 3311 + }, + { + "epoch": 1.6365995304584209, + "grad_norm": 1.3183461895569366, + "learning_rate": 1.7589309466519556e-06, + "loss": 0.2590476870536804, + "step": 3312 + }, + { + "epoch": 1.637093784752255, + "grad_norm": 1.4033807602679105, + "learning_rate": 1.754305594428254e-06, + "loss": 0.26833316683769226, + "step": 3313 + }, + { + "epoch": 1.637588039046089, + "grad_norm": 1.2949590395956057, + "learning_rate": 1.749685746912323e-06, + "loss": 0.23390671610832214, + "step": 3314 + }, + { + "epoch": 1.6380822933399233, + "grad_norm": 1.350070481785481, + "learning_rate": 1.7450714071883079e-06, + "loss": 0.2760172188282013, + "step": 3315 + }, + { + "epoch": 1.6385765476337575, + "grad_norm": 1.387338184553767, + "learning_rate": 1.7404625783366703e-06, + "loss": 0.255672812461853, + "step": 3316 + }, + { + "epoch": 1.6390708019275917, + "grad_norm": 1.4073549622144716, + "learning_rate": 1.7358592634342008e-06, + "loss": 0.26336947083473206, + "step": 3317 + }, + { + "epoch": 1.639565056221426, + "grad_norm": 1.2609217918610456, + "learning_rate": 1.7312614655540071e-06, + "loss": 0.2308199107646942, + "step": 3318 + }, + { + "epoch": 1.64005931051526, + "grad_norm": 1.335188741822115, + "learning_rate": 1.7266691877655129e-06, + "loss": 0.24762676656246185, + "step": 3319 + }, + { + "epoch": 1.6405535648090943, + "grad_norm": 1.3287358421539026, + "learning_rate": 1.7220824331344577e-06, + "loss": 0.2175157219171524, + "step": 3320 + }, + { + "epoch": 1.6410478191029285, + "grad_norm": 1.3635707435478155, + "learning_rate": 1.7175012047228956e-06, + "loss": 0.24319039285182953, + "step": 3321 + }, + { + "epoch": 1.6415420733967627, + "grad_norm": 1.2272267263054326, + "learning_rate": 1.7129255055891813e-06, + "loss": 0.21708521246910095, + "step": 3322 + }, + { + "epoch": 1.642036327690597, + "grad_norm": 1.4404881849035673, + "learning_rate": 1.7083553387879969e-06, + "loss": 0.28576910495758057, + "step": 3323 + }, + { + "epoch": 1.6425305819844311, + "grad_norm": 1.3120467826579518, + "learning_rate": 1.703790707370313e-06, + "loss": 0.2664312720298767, + "step": 3324 + }, + { + "epoch": 1.6430248362782651, + "grad_norm": 1.5950926505285568, + "learning_rate": 1.6992316143834142e-06, + "loss": 0.23930951952934265, + "step": 3325 + }, + { + "epoch": 1.6435190905720993, + "grad_norm": 1.3985303284465023, + "learning_rate": 1.694678062870886e-06, + "loss": 0.2741955518722534, + "step": 3326 + }, + { + "epoch": 1.6440133448659335, + "grad_norm": 1.2830935776841221, + "learning_rate": 1.6901300558726142e-06, + "loss": 0.25177690386772156, + "step": 3327 + }, + { + "epoch": 1.6445075991597677, + "grad_norm": 1.4111945712412088, + "learning_rate": 1.6855875964247837e-06, + "loss": 0.26517611742019653, + "step": 3328 + }, + { + "epoch": 1.6450018534536017, + "grad_norm": 1.227994601145186, + "learning_rate": 1.6810506875598776e-06, + "loss": 0.2294573187828064, + "step": 3329 + }, + { + "epoch": 1.645496107747436, + "grad_norm": 1.3101987526620804, + "learning_rate": 1.6765193323066653e-06, + "loss": 0.23062998056411743, + "step": 3330 + }, + { + "epoch": 1.6459903620412701, + "grad_norm": 1.4687005380243534, + "learning_rate": 1.6719935336902205e-06, + "loss": 0.3047422468662262, + "step": 3331 + }, + { + "epoch": 1.6464846163351043, + "grad_norm": 1.4214345840675306, + "learning_rate": 1.6674732947319017e-06, + "loss": 0.2715694308280945, + "step": 3332 + }, + { + "epoch": 1.6469788706289386, + "grad_norm": 1.3486732362780178, + "learning_rate": 1.6629586184493519e-06, + "loss": 0.20359721779823303, + "step": 3333 + }, + { + "epoch": 1.6474731249227728, + "grad_norm": 1.256842666883273, + "learning_rate": 1.6584495078565045e-06, + "loss": 0.20083262026309967, + "step": 3334 + }, + { + "epoch": 1.647967379216607, + "grad_norm": 1.2824441486710174, + "learning_rate": 1.6539459659635848e-06, + "loss": 0.2274707555770874, + "step": 3335 + }, + { + "epoch": 1.6484616335104412, + "grad_norm": 1.4170790489583633, + "learning_rate": 1.6494479957770847e-06, + "loss": 0.2654137909412384, + "step": 3336 + }, + { + "epoch": 1.6489558878042754, + "grad_norm": 1.2207871831065553, + "learning_rate": 1.644955600299788e-06, + "loss": 0.24672716856002808, + "step": 3337 + }, + { + "epoch": 1.6494501420981096, + "grad_norm": 2.7206661248050494, + "learning_rate": 1.640468782530753e-06, + "loss": 0.21563802659511566, + "step": 3338 + }, + { + "epoch": 1.6499443963919438, + "grad_norm": 1.2772497258385302, + "learning_rate": 1.6359875454653151e-06, + "loss": 0.22986169159412384, + "step": 3339 + }, + { + "epoch": 1.650438650685778, + "grad_norm": 1.1914212857874291, + "learning_rate": 1.6315118920950857e-06, + "loss": 0.22981731593608856, + "step": 3340 + }, + { + "epoch": 1.650932904979612, + "grad_norm": 1.423180347857553, + "learning_rate": 1.6270418254079478e-06, + "loss": 0.25922536849975586, + "step": 3341 + }, + { + "epoch": 1.6514271592734462, + "grad_norm": 1.3808711162643625, + "learning_rate": 1.6225773483880503e-06, + "loss": 0.23273468017578125, + "step": 3342 + }, + { + "epoch": 1.6519214135672804, + "grad_norm": 1.3019728240659525, + "learning_rate": 1.6181184640158165e-06, + "loss": 0.22988896071910858, + "step": 3343 + }, + { + "epoch": 1.6524156678611144, + "grad_norm": 1.3674976753844925, + "learning_rate": 1.6136651752679333e-06, + "loss": 0.2628646790981293, + "step": 3344 + }, + { + "epoch": 1.6529099221549486, + "grad_norm": 1.3498513177046836, + "learning_rate": 1.6092174851173526e-06, + "loss": 0.24670086801052094, + "step": 3345 + }, + { + "epoch": 1.6534041764487828, + "grad_norm": 1.3175919767027275, + "learning_rate": 1.6047753965332902e-06, + "loss": 0.27845436334609985, + "step": 3346 + }, + { + "epoch": 1.653898430742617, + "grad_norm": 1.30200656487082, + "learning_rate": 1.6003389124812185e-06, + "loss": 0.25297483801841736, + "step": 3347 + }, + { + "epoch": 1.6543926850364512, + "grad_norm": 1.237195636484559, + "learning_rate": 1.595908035922873e-06, + "loss": 0.18876859545707703, + "step": 3348 + }, + { + "epoch": 1.6548869393302854, + "grad_norm": 1.3417621492525376, + "learning_rate": 1.591482769816246e-06, + "loss": 0.23852673172950745, + "step": 3349 + }, + { + "epoch": 1.6553811936241196, + "grad_norm": 1.3350614987774176, + "learning_rate": 1.587063117115576e-06, + "loss": 0.2569701373577118, + "step": 3350 + }, + { + "epoch": 1.6558754479179538, + "grad_norm": 1.354350083762125, + "learning_rate": 1.582649080771359e-06, + "loss": 0.29305699467658997, + "step": 3351 + }, + { + "epoch": 1.656369702211788, + "grad_norm": 1.42534989112271, + "learning_rate": 1.5782406637303527e-06, + "loss": 0.28942832350730896, + "step": 3352 + }, + { + "epoch": 1.6568639565056222, + "grad_norm": 1.351062882636418, + "learning_rate": 1.5738378689355439e-06, + "loss": 0.27491068840026855, + "step": 3353 + }, + { + "epoch": 1.6573582107994564, + "grad_norm": 1.4736732865815314, + "learning_rate": 1.569440699326179e-06, + "loss": 0.26730844378471375, + "step": 3354 + }, + { + "epoch": 1.6578524650932907, + "grad_norm": 1.3194299490413177, + "learning_rate": 1.5650491578377458e-06, + "loss": 0.23610982298851013, + "step": 3355 + }, + { + "epoch": 1.6583467193871246, + "grad_norm": 1.5894671595119023, + "learning_rate": 1.5606632474019734e-06, + "loss": 0.26817262172698975, + "step": 3356 + }, + { + "epoch": 1.6588409736809588, + "grad_norm": 1.4847304906222882, + "learning_rate": 1.556282970946833e-06, + "loss": 0.2403341382741928, + "step": 3357 + }, + { + "epoch": 1.659335227974793, + "grad_norm": 1.4109665373138245, + "learning_rate": 1.5519083313965378e-06, + "loss": 0.24433058500289917, + "step": 3358 + }, + { + "epoch": 1.6598294822686273, + "grad_norm": 1.2685951523616033, + "learning_rate": 1.5475393316715282e-06, + "loss": 0.2526702582836151, + "step": 3359 + }, + { + "epoch": 1.6603237365624612, + "grad_norm": 1.3373930264060108, + "learning_rate": 1.543175974688491e-06, + "loss": 0.24032334983348846, + "step": 3360 + }, + { + "epoch": 1.6608179908562954, + "grad_norm": 1.3759465001084996, + "learning_rate": 1.5388182633603433e-06, + "loss": 0.27770349383354187, + "step": 3361 + }, + { + "epoch": 1.6613122451501297, + "grad_norm": 1.5590715119269358, + "learning_rate": 1.534466200596224e-06, + "loss": 0.26002752780914307, + "step": 3362 + }, + { + "epoch": 1.6618064994439639, + "grad_norm": 1.867324678142589, + "learning_rate": 1.5301197893015129e-06, + "loss": 0.2707037329673767, + "step": 3363 + }, + { + "epoch": 1.662300753737798, + "grad_norm": 1.3300911116600942, + "learning_rate": 1.52577903237781e-06, + "loss": 0.27249252796173096, + "step": 3364 + }, + { + "epoch": 1.6627950080316323, + "grad_norm": 1.341030721831506, + "learning_rate": 1.5214439327229425e-06, + "loss": 0.22495020925998688, + "step": 3365 + }, + { + "epoch": 1.6632892623254665, + "grad_norm": 1.4580410293752506, + "learning_rate": 1.5171144932309622e-06, + "loss": 0.23561973869800568, + "step": 3366 + }, + { + "epoch": 1.6637835166193007, + "grad_norm": 1.4580927261417298, + "learning_rate": 1.512790716792143e-06, + "loss": 0.2689869701862335, + "step": 3367 + }, + { + "epoch": 1.664277770913135, + "grad_norm": 1.2734577307213573, + "learning_rate": 1.5084726062929688e-06, + "loss": 0.22249455749988556, + "step": 3368 + }, + { + "epoch": 1.664772025206969, + "grad_norm": 1.444110335390912, + "learning_rate": 1.5041601646161585e-06, + "loss": 0.24586130678653717, + "step": 3369 + }, + { + "epoch": 1.6652662795008033, + "grad_norm": 1.3250583547488792, + "learning_rate": 1.499853394640629e-06, + "loss": 0.2549409568309784, + "step": 3370 + }, + { + "epoch": 1.6657605337946373, + "grad_norm": 1.4135792596464256, + "learning_rate": 1.4955522992415206e-06, + "loss": 0.2517774999141693, + "step": 3371 + }, + { + "epoch": 1.6662547880884715, + "grad_norm": 1.6132674993246225, + "learning_rate": 1.491256881290184e-06, + "loss": 0.2627662420272827, + "step": 3372 + }, + { + "epoch": 1.6667490423823057, + "grad_norm": 1.250156659660365, + "learning_rate": 1.4869671436541788e-06, + "loss": 0.25203272700309753, + "step": 3373 + }, + { + "epoch": 1.66724329667614, + "grad_norm": 1.3035778741812132, + "learning_rate": 1.482683089197271e-06, + "loss": 0.2206164300441742, + "step": 3374 + }, + { + "epoch": 1.667737550969974, + "grad_norm": 1.4034071560123977, + "learning_rate": 1.4784047207794383e-06, + "loss": 0.2551203966140747, + "step": 3375 + }, + { + "epoch": 1.668231805263808, + "grad_norm": 1.4247468939554981, + "learning_rate": 1.4741320412568505e-06, + "loss": 0.2592264711856842, + "step": 3376 + }, + { + "epoch": 1.6687260595576423, + "grad_norm": 1.3609833066581156, + "learning_rate": 1.4698650534818936e-06, + "loss": 0.25902658700942993, + "step": 3377 + }, + { + "epoch": 1.6692203138514765, + "grad_norm": 1.5283083080675575, + "learning_rate": 1.4656037603031491e-06, + "loss": 0.2685459852218628, + "step": 3378 + }, + { + "epoch": 1.6697145681453107, + "grad_norm": 1.2083368696295387, + "learning_rate": 1.4613481645653914e-06, + "loss": 0.21010839939117432, + "step": 3379 + }, + { + "epoch": 1.670208822439145, + "grad_norm": 1.3019618254178054, + "learning_rate": 1.4570982691095925e-06, + "loss": 0.23318082094192505, + "step": 3380 + }, + { + "epoch": 1.6707030767329791, + "grad_norm": 1.346937478273973, + "learning_rate": 1.4528540767729315e-06, + "loss": 0.25045326352119446, + "step": 3381 + }, + { + "epoch": 1.6711973310268133, + "grad_norm": 1.5157571774504706, + "learning_rate": 1.4486155903887623e-06, + "loss": 0.2436288446187973, + "step": 3382 + }, + { + "epoch": 1.6716915853206475, + "grad_norm": 1.2766580343897052, + "learning_rate": 1.444382812786641e-06, + "loss": 0.20454761385917664, + "step": 3383 + }, + { + "epoch": 1.6721858396144818, + "grad_norm": 1.3207693230256567, + "learning_rate": 1.4401557467923089e-06, + "loss": 0.24906963109970093, + "step": 3384 + }, + { + "epoch": 1.672680093908316, + "grad_norm": 1.3391460516330347, + "learning_rate": 1.435934395227695e-06, + "loss": 0.2552015483379364, + "step": 3385 + }, + { + "epoch": 1.6731743482021502, + "grad_norm": 1.3523733680416914, + "learning_rate": 1.4317187609109129e-06, + "loss": 0.2393915057182312, + "step": 3386 + }, + { + "epoch": 1.6736686024959841, + "grad_norm": 1.370539563215592, + "learning_rate": 1.4275088466562625e-06, + "loss": 0.2607477009296417, + "step": 3387 + }, + { + "epoch": 1.6741628567898184, + "grad_norm": 1.3296614147148798, + "learning_rate": 1.423304655274218e-06, + "loss": 0.23722632229328156, + "step": 3388 + }, + { + "epoch": 1.6746571110836526, + "grad_norm": 1.303256653854929, + "learning_rate": 1.4191061895714398e-06, + "loss": 0.2614964246749878, + "step": 3389 + }, + { + "epoch": 1.6751513653774868, + "grad_norm": 1.476448410559568, + "learning_rate": 1.4149134523507634e-06, + "loss": 0.2727823555469513, + "step": 3390 + }, + { + "epoch": 1.6756456196713208, + "grad_norm": 1.2739771939884463, + "learning_rate": 1.4107264464112003e-06, + "loss": 0.25176581740379333, + "step": 3391 + }, + { + "epoch": 1.676139873965155, + "grad_norm": 1.3087240197668597, + "learning_rate": 1.4065451745479352e-06, + "loss": 0.21339070796966553, + "step": 3392 + }, + { + "epoch": 1.6766341282589892, + "grad_norm": 1.449069234603101, + "learning_rate": 1.4023696395523267e-06, + "loss": 0.26540419459342957, + "step": 3393 + }, + { + "epoch": 1.6771283825528234, + "grad_norm": 1.3788929945945605, + "learning_rate": 1.3981998442119017e-06, + "loss": 0.2621360421180725, + "step": 3394 + }, + { + "epoch": 1.6776226368466576, + "grad_norm": 1.3149158272362809, + "learning_rate": 1.3940357913103576e-06, + "loss": 0.2578747570514679, + "step": 3395 + }, + { + "epoch": 1.6781168911404918, + "grad_norm": 1.3223117210430684, + "learning_rate": 1.3898774836275531e-06, + "loss": 0.26105010509490967, + "step": 3396 + }, + { + "epoch": 1.678611145434326, + "grad_norm": 1.277709690267506, + "learning_rate": 1.3857249239395143e-06, + "loss": 0.2221919298171997, + "step": 3397 + }, + { + "epoch": 1.6791053997281602, + "grad_norm": 1.3742911888899896, + "learning_rate": 1.3815781150184382e-06, + "loss": 0.2498932033777237, + "step": 3398 + }, + { + "epoch": 1.6795996540219944, + "grad_norm": 1.3631278461436225, + "learning_rate": 1.377437059632668e-06, + "loss": 0.29306796193122864, + "step": 3399 + }, + { + "epoch": 1.6800939083158286, + "grad_norm": 1.41106483401144, + "learning_rate": 1.3733017605467158e-06, + "loss": 0.23804892599582672, + "step": 3400 + }, + { + "epoch": 1.6805881626096628, + "grad_norm": 1.264388446305106, + "learning_rate": 1.3691722205212465e-06, + "loss": 0.18528425693511963, + "step": 3401 + }, + { + "epoch": 1.6810824169034968, + "grad_norm": 1.434400904695952, + "learning_rate": 1.365048442313085e-06, + "loss": 0.257534921169281, + "step": 3402 + }, + { + "epoch": 1.681576671197331, + "grad_norm": 1.390183210111369, + "learning_rate": 1.3609304286752034e-06, + "loss": 0.2519993782043457, + "step": 3403 + }, + { + "epoch": 1.6820709254911652, + "grad_norm": 1.5041703905686798, + "learning_rate": 1.3568181823567328e-06, + "loss": 0.27830445766448975, + "step": 3404 + }, + { + "epoch": 1.6825651797849994, + "grad_norm": 1.3496130761993563, + "learning_rate": 1.3527117061029438e-06, + "loss": 0.22532883286476135, + "step": 3405 + }, + { + "epoch": 1.6830594340788334, + "grad_norm": 1.3484913124474047, + "learning_rate": 1.3486110026552668e-06, + "loss": 0.23230011761188507, + "step": 3406 + }, + { + "epoch": 1.6835536883726676, + "grad_norm": 1.320791018685261, + "learning_rate": 1.3445160747512743e-06, + "loss": 0.24105653166770935, + "step": 3407 + }, + { + "epoch": 1.6840479426665018, + "grad_norm": 1.5077644423875391, + "learning_rate": 1.340426925124676e-06, + "loss": 0.2946394681930542, + "step": 3408 + }, + { + "epoch": 1.684542196960336, + "grad_norm": 1.403422513607122, + "learning_rate": 1.3363435565053319e-06, + "loss": 0.2682989239692688, + "step": 3409 + }, + { + "epoch": 1.6850364512541702, + "grad_norm": 1.3363195283881322, + "learning_rate": 1.332265971619241e-06, + "loss": 0.2219456285238266, + "step": 3410 + }, + { + "epoch": 1.6855307055480044, + "grad_norm": 1.2440577869208935, + "learning_rate": 1.3281941731885396e-06, + "loss": 0.22532151639461517, + "step": 3411 + }, + { + "epoch": 1.6860249598418386, + "grad_norm": 1.3951142777226702, + "learning_rate": 1.324128163931504e-06, + "loss": 0.24166807532310486, + "step": 3412 + }, + { + "epoch": 1.6865192141356729, + "grad_norm": 1.8803758040895027, + "learning_rate": 1.3200679465625453e-06, + "loss": 0.25514671206474304, + "step": 3413 + }, + { + "epoch": 1.687013468429507, + "grad_norm": 1.4161288294493581, + "learning_rate": 1.3160135237922011e-06, + "loss": 0.263123482465744, + "step": 3414 + }, + { + "epoch": 1.6875077227233413, + "grad_norm": 1.3692510048196695, + "learning_rate": 1.3119648983271527e-06, + "loss": 0.23763976991176605, + "step": 3415 + }, + { + "epoch": 1.6880019770171755, + "grad_norm": 1.4514594135261416, + "learning_rate": 1.3079220728701991e-06, + "loss": 0.28645598888397217, + "step": 3416 + }, + { + "epoch": 1.6884962313110097, + "grad_norm": 1.3145652794970974, + "learning_rate": 1.303885050120275e-06, + "loss": 0.2269624024629593, + "step": 3417 + }, + { + "epoch": 1.6889904856048437, + "grad_norm": 1.2380861054344243, + "learning_rate": 1.2998538327724386e-06, + "loss": 0.23601466417312622, + "step": 3418 + }, + { + "epoch": 1.6894847398986779, + "grad_norm": 1.4253359182592056, + "learning_rate": 1.2958284235178743e-06, + "loss": 0.2246169149875641, + "step": 3419 + }, + { + "epoch": 1.689978994192512, + "grad_norm": 1.497489718348998, + "learning_rate": 1.2918088250438865e-06, + "loss": 0.26519715785980225, + "step": 3420 + }, + { + "epoch": 1.6904732484863463, + "grad_norm": 1.443915314302877, + "learning_rate": 1.2877950400339046e-06, + "loss": 0.2590267062187195, + "step": 3421 + }, + { + "epoch": 1.6909675027801803, + "grad_norm": 1.3941822393799335, + "learning_rate": 1.2837870711674672e-06, + "loss": 0.2535945773124695, + "step": 3422 + }, + { + "epoch": 1.6914617570740145, + "grad_norm": 1.3833358145204437, + "learning_rate": 1.279784921120244e-06, + "loss": 0.21907874941825867, + "step": 3423 + }, + { + "epoch": 1.6919560113678487, + "grad_norm": 1.3775789573220893, + "learning_rate": 1.2757885925640124e-06, + "loss": 0.23314553499221802, + "step": 3424 + }, + { + "epoch": 1.6924502656616829, + "grad_norm": 1.2335650824399806, + "learning_rate": 1.2717980881666615e-06, + "loss": 0.2288433313369751, + "step": 3425 + }, + { + "epoch": 1.692944519955517, + "grad_norm": 1.3218922014839134, + "learning_rate": 1.2678134105921924e-06, + "loss": 0.2285449206829071, + "step": 3426 + }, + { + "epoch": 1.6934387742493513, + "grad_norm": 1.4061495134031399, + "learning_rate": 1.2638345625007287e-06, + "loss": 0.2898653447628021, + "step": 3427 + }, + { + "epoch": 1.6939330285431855, + "grad_norm": 1.3140964049835469, + "learning_rate": 1.2598615465484831e-06, + "loss": 0.23574519157409668, + "step": 3428 + }, + { + "epoch": 1.6944272828370197, + "grad_norm": 1.8163323929078987, + "learning_rate": 1.2558943653877887e-06, + "loss": 0.23385417461395264, + "step": 3429 + }, + { + "epoch": 1.694921537130854, + "grad_norm": 1.4332956021988026, + "learning_rate": 1.2519330216670766e-06, + "loss": 0.2555482089519501, + "step": 3430 + }, + { + "epoch": 1.6954157914246881, + "grad_norm": 1.3005186125236943, + "learning_rate": 1.247977518030885e-06, + "loss": 0.22221535444259644, + "step": 3431 + }, + { + "epoch": 1.6959100457185223, + "grad_norm": 1.2645213358789251, + "learning_rate": 1.2440278571198516e-06, + "loss": 0.21753090620040894, + "step": 3432 + }, + { + "epoch": 1.6964043000123563, + "grad_norm": 1.3199124302473737, + "learning_rate": 1.240084041570716e-06, + "loss": 0.2352944314479828, + "step": 3433 + }, + { + "epoch": 1.6968985543061905, + "grad_norm": 1.3019158889354874, + "learning_rate": 1.2361460740163045e-06, + "loss": 0.22581814229488373, + "step": 3434 + }, + { + "epoch": 1.6973928086000247, + "grad_norm": 1.5051457985045136, + "learning_rate": 1.2322139570855596e-06, + "loss": 0.28703421354293823, + "step": 3435 + }, + { + "epoch": 1.697887062893859, + "grad_norm": 1.2466294121854475, + "learning_rate": 1.2282876934034972e-06, + "loss": 0.21528789401054382, + "step": 3436 + }, + { + "epoch": 1.698381317187693, + "grad_norm": 1.3714652202926056, + "learning_rate": 1.2243672855912393e-06, + "loss": 0.2675422430038452, + "step": 3437 + }, + { + "epoch": 1.6988755714815271, + "grad_norm": 1.4468798550658835, + "learning_rate": 1.2204527362659913e-06, + "loss": 0.26681527495384216, + "step": 3438 + }, + { + "epoch": 1.6993698257753613, + "grad_norm": 1.6692863707132455, + "learning_rate": 1.216544048041054e-06, + "loss": 0.2436470091342926, + "step": 3439 + }, + { + "epoch": 1.6998640800691955, + "grad_norm": 1.3471564011899657, + "learning_rate": 1.212641223525809e-06, + "loss": 0.25458425283432007, + "step": 3440 + }, + { + "epoch": 1.7003583343630297, + "grad_norm": 1.5076141037655715, + "learning_rate": 1.2087442653257286e-06, + "loss": 0.24890559911727905, + "step": 3441 + }, + { + "epoch": 1.700852588656864, + "grad_norm": 1.2935321774740525, + "learning_rate": 1.2048531760423642e-06, + "loss": 0.26031816005706787, + "step": 3442 + }, + { + "epoch": 1.7013468429506982, + "grad_norm": 1.2852726465517723, + "learning_rate": 1.200967958273349e-06, + "loss": 0.22184975445270538, + "step": 3443 + }, + { + "epoch": 1.7018410972445324, + "grad_norm": 1.4055101079653758, + "learning_rate": 1.1970886146124073e-06, + "loss": 0.2670953571796417, + "step": 3444 + }, + { + "epoch": 1.7023353515383666, + "grad_norm": 1.4509425159233789, + "learning_rate": 1.1932151476493247e-06, + "loss": 0.27950525283813477, + "step": 3445 + }, + { + "epoch": 1.7028296058322008, + "grad_norm": 1.177838308027136, + "learning_rate": 1.1893475599699766e-06, + "loss": 0.23257380723953247, + "step": 3446 + }, + { + "epoch": 1.703323860126035, + "grad_norm": 1.33833163811184, + "learning_rate": 1.1854858541563086e-06, + "loss": 0.2586575746536255, + "step": 3447 + }, + { + "epoch": 1.703818114419869, + "grad_norm": 1.4079485154063143, + "learning_rate": 1.1816300327863406e-06, + "loss": 0.2677457928657532, + "step": 3448 + }, + { + "epoch": 1.7043123687137032, + "grad_norm": 1.565618455451115, + "learning_rate": 1.1777800984341637e-06, + "loss": 0.29866284132003784, + "step": 3449 + }, + { + "epoch": 1.7048066230075374, + "grad_norm": 1.3858480302164131, + "learning_rate": 1.1739360536699397e-06, + "loss": 0.27279675006866455, + "step": 3450 + }, + { + "epoch": 1.7053008773013716, + "grad_norm": 1.4265301971817403, + "learning_rate": 1.1700979010598945e-06, + "loss": 0.25695672631263733, + "step": 3451 + }, + { + "epoch": 1.7057951315952056, + "grad_norm": 1.2548676263466874, + "learning_rate": 1.1662656431663278e-06, + "loss": 0.22578787803649902, + "step": 3452 + }, + { + "epoch": 1.7062893858890398, + "grad_norm": 1.2884557931863843, + "learning_rate": 1.1624392825476016e-06, + "loss": 0.1946491301059723, + "step": 3453 + }, + { + "epoch": 1.706783640182874, + "grad_norm": 1.7214838792794764, + "learning_rate": 1.158618821758134e-06, + "loss": 0.2099667191505432, + "step": 3454 + }, + { + "epoch": 1.7072778944767082, + "grad_norm": 1.3956932051100446, + "learning_rate": 1.1548042633484148e-06, + "loss": 0.22660428285598755, + "step": 3455 + }, + { + "epoch": 1.7077721487705424, + "grad_norm": 1.486801447510752, + "learning_rate": 1.1509956098649855e-06, + "loss": 0.27378255128860474, + "step": 3456 + }, + { + "epoch": 1.7082664030643766, + "grad_norm": 1.3265929348116055, + "learning_rate": 1.1471928638504504e-06, + "loss": 0.2209164947271347, + "step": 3457 + }, + { + "epoch": 1.7087606573582108, + "grad_norm": 1.4225246621575494, + "learning_rate": 1.1433960278434687e-06, + "loss": 0.24310322105884552, + "step": 3458 + }, + { + "epoch": 1.709254911652045, + "grad_norm": 1.408175906725771, + "learning_rate": 1.1396051043787526e-06, + "loss": 0.23209068179130554, + "step": 3459 + }, + { + "epoch": 1.7097491659458792, + "grad_norm": 1.3815567972930465, + "learning_rate": 1.1358200959870703e-06, + "loss": 0.2514454126358032, + "step": 3460 + }, + { + "epoch": 1.7102434202397134, + "grad_norm": 1.4417631759146625, + "learning_rate": 1.132041005195239e-06, + "loss": 0.2580721378326416, + "step": 3461 + }, + { + "epoch": 1.7107376745335476, + "grad_norm": 1.3709268368925525, + "learning_rate": 1.1282678345261234e-06, + "loss": 0.26388949155807495, + "step": 3462 + }, + { + "epoch": 1.7112319288273818, + "grad_norm": 1.2783952905855267, + "learning_rate": 1.1245005864986402e-06, + "loss": 0.2194654643535614, + "step": 3463 + }, + { + "epoch": 1.7117261831212158, + "grad_norm": 1.2633121407835717, + "learning_rate": 1.1207392636277502e-06, + "loss": 0.2048814296722412, + "step": 3464 + }, + { + "epoch": 1.71222043741505, + "grad_norm": 1.33926020269927, + "learning_rate": 1.1169838684244584e-06, + "loss": 0.24165832996368408, + "step": 3465 + }, + { + "epoch": 1.7127146917088842, + "grad_norm": 1.3906329052137327, + "learning_rate": 1.1132344033958132e-06, + "loss": 0.2484482377767563, + "step": 3466 + }, + { + "epoch": 1.7132089460027184, + "grad_norm": 1.4564028814853938, + "learning_rate": 1.1094908710449048e-06, + "loss": 0.2406741827726364, + "step": 3467 + }, + { + "epoch": 1.7137032002965524, + "grad_norm": 1.4018531611252434, + "learning_rate": 1.1057532738708588e-06, + "loss": 0.2417721152305603, + "step": 3468 + }, + { + "epoch": 1.7141974545903866, + "grad_norm": 1.4560734194910743, + "learning_rate": 1.1020216143688446e-06, + "loss": 0.26304543018341064, + "step": 3469 + }, + { + "epoch": 1.7146917088842208, + "grad_norm": 1.476031518585943, + "learning_rate": 1.098295895030066e-06, + "loss": 0.30013689398765564, + "step": 3470 + }, + { + "epoch": 1.715185963178055, + "grad_norm": 1.3175345714713855, + "learning_rate": 1.0945761183417569e-06, + "loss": 0.21451817452907562, + "step": 3471 + }, + { + "epoch": 1.7156802174718893, + "grad_norm": 1.3300365419760627, + "learning_rate": 1.0908622867871854e-06, + "loss": 0.235377699136734, + "step": 3472 + }, + { + "epoch": 1.7161744717657235, + "grad_norm": 1.2866674867130445, + "learning_rate": 1.0871544028456594e-06, + "loss": 0.23560425639152527, + "step": 3473 + }, + { + "epoch": 1.7166687260595577, + "grad_norm": 1.3385949926310057, + "learning_rate": 1.083452468992503e-06, + "loss": 0.2431229054927826, + "step": 3474 + }, + { + "epoch": 1.7171629803533919, + "grad_norm": 1.2089508133597444, + "learning_rate": 1.0797564876990762e-06, + "loss": 0.211553692817688, + "step": 3475 + }, + { + "epoch": 1.717657234647226, + "grad_norm": 1.3533177183735723, + "learning_rate": 1.0760664614327643e-06, + "loss": 0.23565953969955444, + "step": 3476 + }, + { + "epoch": 1.7181514889410603, + "grad_norm": 1.328162178864468, + "learning_rate": 1.0723823926569744e-06, + "loss": 0.2052966058254242, + "step": 3477 + }, + { + "epoch": 1.7186457432348945, + "grad_norm": 1.3067945675468369, + "learning_rate": 1.06870428383114e-06, + "loss": 0.24831204116344452, + "step": 3478 + }, + { + "epoch": 1.7191399975287285, + "grad_norm": 1.273169118321956, + "learning_rate": 1.0650321374107142e-06, + "loss": 0.24706462025642395, + "step": 3479 + }, + { + "epoch": 1.7196342518225627, + "grad_norm": 1.4211234189057285, + "learning_rate": 1.0613659558471644e-06, + "loss": 0.20845818519592285, + "step": 3480 + }, + { + "epoch": 1.720128506116397, + "grad_norm": 1.2323642708024432, + "learning_rate": 1.0577057415879887e-06, + "loss": 0.21599797904491425, + "step": 3481 + }, + { + "epoch": 1.720622760410231, + "grad_norm": 1.4618240857831881, + "learning_rate": 1.054051497076689e-06, + "loss": 0.2381049394607544, + "step": 3482 + }, + { + "epoch": 1.721117014704065, + "grad_norm": 1.3155008449637104, + "learning_rate": 1.0504032247527874e-06, + "loss": 0.22402817010879517, + "step": 3483 + }, + { + "epoch": 1.7216112689978993, + "grad_norm": 1.5409902580545625, + "learning_rate": 1.0467609270518186e-06, + "loss": 0.24406251311302185, + "step": 3484 + }, + { + "epoch": 1.7221055232917335, + "grad_norm": 1.339222294791023, + "learning_rate": 1.0431246064053291e-06, + "loss": 0.24388936161994934, + "step": 3485 + }, + { + "epoch": 1.7225997775855677, + "grad_norm": 1.3265412686691833, + "learning_rate": 1.0394942652408735e-06, + "loss": 0.26131671667099, + "step": 3486 + }, + { + "epoch": 1.723094031879402, + "grad_norm": 1.3718768259485188, + "learning_rate": 1.0358699059820188e-06, + "loss": 0.247392475605011, + "step": 3487 + }, + { + "epoch": 1.7235882861732361, + "grad_norm": 1.335920284358623, + "learning_rate": 1.0322515310483316e-06, + "loss": 0.22713768482208252, + "step": 3488 + }, + { + "epoch": 1.7240825404670703, + "grad_norm": 1.3821197244420464, + "learning_rate": 1.0286391428553854e-06, + "loss": 0.2544357180595398, + "step": 3489 + }, + { + "epoch": 1.7245767947609045, + "grad_norm": 1.260460911336476, + "learning_rate": 1.0250327438147678e-06, + "loss": 0.23186656832695007, + "step": 3490 + }, + { + "epoch": 1.7250710490547387, + "grad_norm": 1.1804266448755296, + "learning_rate": 1.0214323363340506e-06, + "loss": 0.20387035608291626, + "step": 3491 + }, + { + "epoch": 1.725565303348573, + "grad_norm": 1.4265943405789598, + "learning_rate": 1.017837922816819e-06, + "loss": 0.25391846895217896, + "step": 3492 + }, + { + "epoch": 1.7260595576424071, + "grad_norm": 1.2603447890118837, + "learning_rate": 1.014249505662649e-06, + "loss": 0.23214812576770782, + "step": 3493 + }, + { + "epoch": 1.7265538119362414, + "grad_norm": 1.5899981641866812, + "learning_rate": 1.0106670872671187e-06, + "loss": 0.31888365745544434, + "step": 3494 + }, + { + "epoch": 1.7270480662300753, + "grad_norm": 1.2907611357867346, + "learning_rate": 1.0070906700217998e-06, + "loss": 0.23372362554073334, + "step": 3495 + }, + { + "epoch": 1.7275423205239095, + "grad_norm": 1.2449017093435057, + "learning_rate": 1.0035202563142577e-06, + "loss": 0.20082907378673553, + "step": 3496 + }, + { + "epoch": 1.7280365748177438, + "grad_norm": 1.3171397747083256, + "learning_rate": 9.99955848528046e-07, + "loss": 0.23895825445652008, + "step": 3497 + }, + { + "epoch": 1.728530829111578, + "grad_norm": 1.4142591511055072, + "learning_rate": 9.963974490427153e-07, + "loss": 0.30089694261550903, + "step": 3498 + }, + { + "epoch": 1.729025083405412, + "grad_norm": 1.4071492496267155, + "learning_rate": 9.928450602338046e-07, + "loss": 0.28134891390800476, + "step": 3499 + }, + { + "epoch": 1.7295193376992462, + "grad_norm": 1.239666390023503, + "learning_rate": 9.892986844728325e-07, + "loss": 0.1947125792503357, + "step": 3500 + }, + { + "epoch": 1.7300135919930804, + "grad_norm": 1.2560350647671819, + "learning_rate": 9.857583241273116e-07, + "loss": 0.252549409866333, + "step": 3501 + }, + { + "epoch": 1.7305078462869146, + "grad_norm": 1.8080125735095465, + "learning_rate": 9.82223981560736e-07, + "loss": 0.28061211109161377, + "step": 3502 + }, + { + "epoch": 1.7310021005807488, + "grad_norm": 1.3465400182463805, + "learning_rate": 9.786956591325813e-07, + "loss": 0.2492327094078064, + "step": 3503 + }, + { + "epoch": 1.731496354874583, + "grad_norm": 1.3114105920039891, + "learning_rate": 9.75173359198307e-07, + "loss": 0.20470373332500458, + "step": 3504 + }, + { + "epoch": 1.7319906091684172, + "grad_norm": 1.4582343704980485, + "learning_rate": 9.716570841093476e-07, + "loss": 0.24190351366996765, + "step": 3505 + }, + { + "epoch": 1.7324848634622514, + "grad_norm": 1.3916465638756335, + "learning_rate": 9.681468362131209e-07, + "loss": 0.28784725069999695, + "step": 3506 + }, + { + "epoch": 1.7329791177560856, + "grad_norm": 1.4872057430892556, + "learning_rate": 9.646426178530176e-07, + "loss": 0.2676560878753662, + "step": 3507 + }, + { + "epoch": 1.7334733720499198, + "grad_norm": 1.4118374661566944, + "learning_rate": 9.611444313684027e-07, + "loss": 0.2493928223848343, + "step": 3508 + }, + { + "epoch": 1.733967626343754, + "grad_norm": 1.272854491876895, + "learning_rate": 9.57652279094613e-07, + "loss": 0.23272472620010376, + "step": 3509 + }, + { + "epoch": 1.734461880637588, + "grad_norm": 1.3295460481124186, + "learning_rate": 9.541661633629662e-07, + "loss": 0.23245804011821747, + "step": 3510 + }, + { + "epoch": 1.7349561349314222, + "grad_norm": 1.318916212284511, + "learning_rate": 9.506860865007373e-07, + "loss": 0.22367024421691895, + "step": 3511 + }, + { + "epoch": 1.7354503892252564, + "grad_norm": 1.312738075120818, + "learning_rate": 9.472120508311788e-07, + "loss": 0.22332677245140076, + "step": 3512 + }, + { + "epoch": 1.7359446435190906, + "grad_norm": 1.3669711817276102, + "learning_rate": 9.437440586735081e-07, + "loss": 0.28051453828811646, + "step": 3513 + }, + { + "epoch": 1.7364388978129246, + "grad_norm": 1.5089189064457602, + "learning_rate": 9.402821123429017e-07, + "loss": 0.24815741181373596, + "step": 3514 + }, + { + "epoch": 1.7369331521067588, + "grad_norm": 1.338757796188803, + "learning_rate": 9.368262141505114e-07, + "loss": 0.24077603220939636, + "step": 3515 + }, + { + "epoch": 1.737427406400593, + "grad_norm": 1.419717776508751, + "learning_rate": 9.333763664034457e-07, + "loss": 0.24596062302589417, + "step": 3516 + }, + { + "epoch": 1.7379216606944272, + "grad_norm": 1.3347588363810814, + "learning_rate": 9.299325714047702e-07, + "loss": 0.22939634323120117, + "step": 3517 + }, + { + "epoch": 1.7384159149882614, + "grad_norm": 1.446015117761441, + "learning_rate": 9.264948314535116e-07, + "loss": 0.24870653450489044, + "step": 3518 + }, + { + "epoch": 1.7389101692820956, + "grad_norm": 1.2985600743859553, + "learning_rate": 9.23063148844664e-07, + "loss": 0.24589623510837555, + "step": 3519 + }, + { + "epoch": 1.7394044235759298, + "grad_norm": 1.3138002527909343, + "learning_rate": 9.196375258691615e-07, + "loss": 0.24228474497795105, + "step": 3520 + }, + { + "epoch": 1.739898677869764, + "grad_norm": 1.274631487561465, + "learning_rate": 9.162179648139047e-07, + "loss": 0.24371150135993958, + "step": 3521 + }, + { + "epoch": 1.7403929321635982, + "grad_norm": 1.279720023026326, + "learning_rate": 9.128044679617432e-07, + "loss": 0.24775750935077667, + "step": 3522 + }, + { + "epoch": 1.7408871864574325, + "grad_norm": 1.5257492514284694, + "learning_rate": 9.093970375914784e-07, + "loss": 0.2893243432044983, + "step": 3523 + }, + { + "epoch": 1.7413814407512667, + "grad_norm": 1.300861064044251, + "learning_rate": 9.059956759778632e-07, + "loss": 0.24014830589294434, + "step": 3524 + }, + { + "epoch": 1.7418756950451009, + "grad_norm": 1.399249837900177, + "learning_rate": 9.026003853915977e-07, + "loss": 0.21439003944396973, + "step": 3525 + }, + { + "epoch": 1.7423699493389349, + "grad_norm": 1.3253623378225632, + "learning_rate": 8.992111680993265e-07, + "loss": 0.23376847803592682, + "step": 3526 + }, + { + "epoch": 1.742864203632769, + "grad_norm": 1.3914877634645069, + "learning_rate": 8.958280263636487e-07, + "loss": 0.244795560836792, + "step": 3527 + }, + { + "epoch": 1.7433584579266033, + "grad_norm": 1.3847661327530765, + "learning_rate": 8.924509624430955e-07, + "loss": 0.2513751685619354, + "step": 3528 + }, + { + "epoch": 1.7438527122204373, + "grad_norm": 1.3808839230401615, + "learning_rate": 8.890799785921478e-07, + "loss": 0.2118893414735794, + "step": 3529 + }, + { + "epoch": 1.7443469665142715, + "grad_norm": 1.4606627623109902, + "learning_rate": 8.857150770612288e-07, + "loss": 0.2834109365940094, + "step": 3530 + }, + { + "epoch": 1.7448412208081057, + "grad_norm": 1.3959930901293698, + "learning_rate": 8.823562600966962e-07, + "loss": 0.2546151876449585, + "step": 3531 + }, + { + "epoch": 1.7453354751019399, + "grad_norm": 1.3410984246991777, + "learning_rate": 8.790035299408494e-07, + "loss": 0.2654607594013214, + "step": 3532 + }, + { + "epoch": 1.745829729395774, + "grad_norm": 1.4773453802832905, + "learning_rate": 8.756568888319239e-07, + "loss": 0.2720295786857605, + "step": 3533 + }, + { + "epoch": 1.7463239836896083, + "grad_norm": 1.3341271298777078, + "learning_rate": 8.723163390040856e-07, + "loss": 0.22259725630283356, + "step": 3534 + }, + { + "epoch": 1.7468182379834425, + "grad_norm": 1.3952830917524783, + "learning_rate": 8.68981882687443e-07, + "loss": 0.22918277978897095, + "step": 3535 + }, + { + "epoch": 1.7473124922772767, + "grad_norm": 1.4553860122555766, + "learning_rate": 8.656535221080297e-07, + "loss": 0.24396009743213654, + "step": 3536 + }, + { + "epoch": 1.747806746571111, + "grad_norm": 1.4530449395488945, + "learning_rate": 8.623312594878097e-07, + "loss": 0.2370900958776474, + "step": 3537 + }, + { + "epoch": 1.748301000864945, + "grad_norm": 1.4353409191789361, + "learning_rate": 8.590150970446798e-07, + "loss": 0.2785671055316925, + "step": 3538 + }, + { + "epoch": 1.7487952551587793, + "grad_norm": 1.3531168663907844, + "learning_rate": 8.557050369924624e-07, + "loss": 0.29365241527557373, + "step": 3539 + }, + { + "epoch": 1.7492895094526135, + "grad_norm": 1.3579124483240532, + "learning_rate": 8.524010815409068e-07, + "loss": 0.24052876234054565, + "step": 3540 + }, + { + "epoch": 1.7497837637464475, + "grad_norm": 1.5096531715278536, + "learning_rate": 8.49103232895685e-07, + "loss": 0.23938694596290588, + "step": 3541 + }, + { + "epoch": 1.7502780180402817, + "grad_norm": 1.2842245856075563, + "learning_rate": 8.458114932583961e-07, + "loss": 0.2244144231081009, + "step": 3542 + }, + { + "epoch": 1.750772272334116, + "grad_norm": 1.4659940645429403, + "learning_rate": 8.425258648265544e-07, + "loss": 0.25028878450393677, + "step": 3543 + }, + { + "epoch": 1.7512665266279501, + "grad_norm": 1.414718407414415, + "learning_rate": 8.39246349793602e-07, + "loss": 0.23135274648666382, + "step": 3544 + }, + { + "epoch": 1.751760780921784, + "grad_norm": 1.3004631081596045, + "learning_rate": 8.359729503488967e-07, + "loss": 0.23874548077583313, + "step": 3545 + }, + { + "epoch": 1.7522550352156183, + "grad_norm": 1.4912661633646227, + "learning_rate": 8.327056686777102e-07, + "loss": 0.2780659794807434, + "step": 3546 + }, + { + "epoch": 1.7527492895094525, + "grad_norm": 1.3424848463452685, + "learning_rate": 8.294445069612356e-07, + "loss": 0.213335320353508, + "step": 3547 + }, + { + "epoch": 1.7532435438032867, + "grad_norm": 1.3764395925344186, + "learning_rate": 8.261894673765757e-07, + "loss": 0.23284730315208435, + "step": 3548 + }, + { + "epoch": 1.753737798097121, + "grad_norm": 1.4152912967440003, + "learning_rate": 8.229405520967504e-07, + "loss": 0.25429633259773254, + "step": 3549 + }, + { + "epoch": 1.7542320523909551, + "grad_norm": 1.42166486412748, + "learning_rate": 8.196977632906877e-07, + "loss": 0.2519379258155823, + "step": 3550 + }, + { + "epoch": 1.7547263066847893, + "grad_norm": 1.3397514660513317, + "learning_rate": 8.164611031232283e-07, + "loss": 0.2510948181152344, + "step": 3551 + }, + { + "epoch": 1.7552205609786236, + "grad_norm": 1.4391737307664527, + "learning_rate": 8.132305737551193e-07, + "loss": 0.27415433526039124, + "step": 3552 + }, + { + "epoch": 1.7557148152724578, + "grad_norm": 1.4503824956137814, + "learning_rate": 8.100061773430179e-07, + "loss": 0.26723912358283997, + "step": 3553 + }, + { + "epoch": 1.756209069566292, + "grad_norm": 1.3305646078685684, + "learning_rate": 8.067879160394821e-07, + "loss": 0.2710701823234558, + "step": 3554 + }, + { + "epoch": 1.7567033238601262, + "grad_norm": 1.2981752509304552, + "learning_rate": 8.035757919929765e-07, + "loss": 0.23247234523296356, + "step": 3555 + }, + { + "epoch": 1.7571975781539602, + "grad_norm": 1.3788336069912301, + "learning_rate": 8.003698073478749e-07, + "loss": 0.2514559328556061, + "step": 3556 + }, + { + "epoch": 1.7576918324477944, + "grad_norm": 1.2669691261364102, + "learning_rate": 7.971699642444419e-07, + "loss": 0.23549199104309082, + "step": 3557 + }, + { + "epoch": 1.7581860867416286, + "grad_norm": 1.326325870924157, + "learning_rate": 7.939762648188476e-07, + "loss": 0.24511446058750153, + "step": 3558 + }, + { + "epoch": 1.7586803410354628, + "grad_norm": 1.244030857989509, + "learning_rate": 7.907887112031609e-07, + "loss": 0.18705075979232788, + "step": 3559 + }, + { + "epoch": 1.7591745953292968, + "grad_norm": 1.3163815425830492, + "learning_rate": 7.876073055253474e-07, + "loss": 0.24297048151493073, + "step": 3560 + }, + { + "epoch": 1.759668849623131, + "grad_norm": 1.3886968971610452, + "learning_rate": 7.844320499092683e-07, + "loss": 0.239119753241539, + "step": 3561 + }, + { + "epoch": 1.7601631039169652, + "grad_norm": 1.3716161630664097, + "learning_rate": 7.81262946474679e-07, + "loss": 0.2430122196674347, + "step": 3562 + }, + { + "epoch": 1.7606573582107994, + "grad_norm": 1.5018987096099226, + "learning_rate": 7.78099997337225e-07, + "loss": 0.2785049378871918, + "step": 3563 + }, + { + "epoch": 1.7611516125046336, + "grad_norm": 1.324774124882076, + "learning_rate": 7.749432046084471e-07, + "loss": 0.2451494038105011, + "step": 3564 + }, + { + "epoch": 1.7616458667984678, + "grad_norm": 1.2759037312949375, + "learning_rate": 7.717925703957785e-07, + "loss": 0.20071648061275482, + "step": 3565 + }, + { + "epoch": 1.762140121092302, + "grad_norm": 1.265455917769001, + "learning_rate": 7.686480968025333e-07, + "loss": 0.22308245301246643, + "step": 3566 + }, + { + "epoch": 1.7626343753861362, + "grad_norm": 1.4753453520092665, + "learning_rate": 7.655097859279192e-07, + "loss": 0.26082009077072144, + "step": 3567 + }, + { + "epoch": 1.7631286296799704, + "grad_norm": 1.2035646972809244, + "learning_rate": 7.623776398670268e-07, + "loss": 0.21026611328125, + "step": 3568 + }, + { + "epoch": 1.7636228839738046, + "grad_norm": 1.3616311603644673, + "learning_rate": 7.592516607108324e-07, + "loss": 0.23878465592861176, + "step": 3569 + }, + { + "epoch": 1.7641171382676388, + "grad_norm": 1.4512524044419246, + "learning_rate": 7.561318505461956e-07, + "loss": 0.30288150906562805, + "step": 3570 + }, + { + "epoch": 1.764611392561473, + "grad_norm": 1.3464088406966324, + "learning_rate": 7.530182114558582e-07, + "loss": 0.25749915838241577, + "step": 3571 + }, + { + "epoch": 1.765105646855307, + "grad_norm": 1.4850779133681176, + "learning_rate": 7.499107455184351e-07, + "loss": 0.23799163103103638, + "step": 3572 + }, + { + "epoch": 1.7655999011491412, + "grad_norm": 1.2970926183891958, + "learning_rate": 7.46809454808436e-07, + "loss": 0.2626670002937317, + "step": 3573 + }, + { + "epoch": 1.7660941554429754, + "grad_norm": 1.4394447645143165, + "learning_rate": 7.437143413962299e-07, + "loss": 0.23273026943206787, + "step": 3574 + }, + { + "epoch": 1.7665884097368096, + "grad_norm": 1.329151714167698, + "learning_rate": 7.406254073480735e-07, + "loss": 0.22592151165008545, + "step": 3575 + }, + { + "epoch": 1.7670826640306436, + "grad_norm": 1.4000212660765223, + "learning_rate": 7.375426547260944e-07, + "loss": 0.2594859004020691, + "step": 3576 + }, + { + "epoch": 1.7675769183244778, + "grad_norm": 1.2114788921542652, + "learning_rate": 7.344660855882946e-07, + "loss": 0.2161571979522705, + "step": 3577 + }, + { + "epoch": 1.768071172618312, + "grad_norm": 1.2669666342048183, + "learning_rate": 7.313957019885487e-07, + "loss": 0.23052990436553955, + "step": 3578 + }, + { + "epoch": 1.7685654269121462, + "grad_norm": 1.2921856609362714, + "learning_rate": 7.283315059766005e-07, + "loss": 0.2309163510799408, + "step": 3579 + }, + { + "epoch": 1.7690596812059804, + "grad_norm": 1.3800150012724666, + "learning_rate": 7.252734995980604e-07, + "loss": 0.24543863534927368, + "step": 3580 + }, + { + "epoch": 1.7695539354998147, + "grad_norm": 1.315509052214176, + "learning_rate": 7.22221684894413e-07, + "loss": 0.27616050839424133, + "step": 3581 + }, + { + "epoch": 1.7700481897936489, + "grad_norm": 1.5849292816622715, + "learning_rate": 7.191760639030077e-07, + "loss": 0.2247719019651413, + "step": 3582 + }, + { + "epoch": 1.770542444087483, + "grad_norm": 1.3600242028973613, + "learning_rate": 7.161366386570545e-07, + "loss": 0.28721702098846436, + "step": 3583 + }, + { + "epoch": 1.7710366983813173, + "grad_norm": 1.3444976293289765, + "learning_rate": 7.131034111856294e-07, + "loss": 0.24191290140151978, + "step": 3584 + }, + { + "epoch": 1.7715309526751515, + "grad_norm": 1.3549546462173616, + "learning_rate": 7.100763835136748e-07, + "loss": 0.24049970507621765, + "step": 3585 + }, + { + "epoch": 1.7720252069689857, + "grad_norm": 1.4855378384649431, + "learning_rate": 7.070555576619887e-07, + "loss": 0.255404531955719, + "step": 3586 + }, + { + "epoch": 1.7725194612628197, + "grad_norm": 1.3672964019576628, + "learning_rate": 7.040409356472333e-07, + "loss": 0.23041129112243652, + "step": 3587 + }, + { + "epoch": 1.7730137155566539, + "grad_norm": 1.3790812567511086, + "learning_rate": 7.010325194819278e-07, + "loss": 0.2589847147464752, + "step": 3588 + }, + { + "epoch": 1.773507969850488, + "grad_norm": 1.4114272066031652, + "learning_rate": 6.980303111744424e-07, + "loss": 0.2604563236236572, + "step": 3589 + }, + { + "epoch": 1.7740022241443223, + "grad_norm": 1.3786249354000182, + "learning_rate": 6.950343127290138e-07, + "loss": 0.26831385493278503, + "step": 3590 + }, + { + "epoch": 1.7744964784381563, + "grad_norm": 1.3398044201914234, + "learning_rate": 6.920445261457276e-07, + "loss": 0.20475032925605774, + "step": 3591 + }, + { + "epoch": 1.7749907327319905, + "grad_norm": 1.669693479578031, + "learning_rate": 6.890609534205206e-07, + "loss": 0.32378682494163513, + "step": 3592 + }, + { + "epoch": 1.7754849870258247, + "grad_norm": 1.4433175991642826, + "learning_rate": 6.86083596545184e-07, + "loss": 0.2526070177555084, + "step": 3593 + }, + { + "epoch": 1.775979241319659, + "grad_norm": 1.3738645357999373, + "learning_rate": 6.831124575073578e-07, + "loss": 0.2467537820339203, + "step": 3594 + }, + { + "epoch": 1.776473495613493, + "grad_norm": 1.4660741149631984, + "learning_rate": 6.801475382905332e-07, + "loss": 0.2857215404510498, + "step": 3595 + }, + { + "epoch": 1.7769677499073273, + "grad_norm": 1.4443968381596262, + "learning_rate": 6.771888408740479e-07, + "loss": 0.23615087568759918, + "step": 3596 + }, + { + "epoch": 1.7774620042011615, + "grad_norm": 1.451390021672748, + "learning_rate": 6.742363672330854e-07, + "loss": 0.2613365054130554, + "step": 3597 + }, + { + "epoch": 1.7779562584949957, + "grad_norm": 1.465141872886975, + "learning_rate": 6.712901193386756e-07, + "loss": 0.2558417320251465, + "step": 3598 + }, + { + "epoch": 1.77845051278883, + "grad_norm": 1.4467371641088191, + "learning_rate": 6.683500991576919e-07, + "loss": 0.2683117091655731, + "step": 3599 + }, + { + "epoch": 1.7789447670826641, + "grad_norm": 1.4625204738144366, + "learning_rate": 6.654163086528487e-07, + "loss": 0.2546064555644989, + "step": 3600 + }, + { + "epoch": 1.7794390213764983, + "grad_norm": 1.5872307428555623, + "learning_rate": 6.624887497827004e-07, + "loss": 0.2683906555175781, + "step": 3601 + }, + { + "epoch": 1.7799332756703325, + "grad_norm": 1.363900663564542, + "learning_rate": 6.595674245016492e-07, + "loss": 0.23260846734046936, + "step": 3602 + }, + { + "epoch": 1.7804275299641665, + "grad_norm": 1.3840728964244504, + "learning_rate": 6.566523347599252e-07, + "loss": 0.22884608805179596, + "step": 3603 + }, + { + "epoch": 1.7809217842580007, + "grad_norm": 1.3583647776279095, + "learning_rate": 6.537434825036027e-07, + "loss": 0.24236485362052917, + "step": 3604 + }, + { + "epoch": 1.781416038551835, + "grad_norm": 1.4869775379128283, + "learning_rate": 6.508408696745893e-07, + "loss": 0.29543957114219666, + "step": 3605 + }, + { + "epoch": 1.781910292845669, + "grad_norm": 1.3626399619539873, + "learning_rate": 6.479444982106276e-07, + "loss": 0.24011383950710297, + "step": 3606 + }, + { + "epoch": 1.7824045471395031, + "grad_norm": 1.3135116984072812, + "learning_rate": 6.450543700452949e-07, + "loss": 0.248407244682312, + "step": 3607 + }, + { + "epoch": 1.7828988014333373, + "grad_norm": 1.4089475770026854, + "learning_rate": 6.421704871080004e-07, + "loss": 0.2405746728181839, + "step": 3608 + }, + { + "epoch": 1.7833930557271716, + "grad_norm": 1.2522903384339197, + "learning_rate": 6.392928513239804e-07, + "loss": 0.24601790308952332, + "step": 3609 + }, + { + "epoch": 1.7838873100210058, + "grad_norm": 1.2436557177887422, + "learning_rate": 6.36421464614303e-07, + "loss": 0.20030242204666138, + "step": 3610 + }, + { + "epoch": 1.78438156431484, + "grad_norm": 1.3296983724782687, + "learning_rate": 6.335563288958691e-07, + "loss": 0.23858311772346497, + "step": 3611 + }, + { + "epoch": 1.7848758186086742, + "grad_norm": 1.4392435044249465, + "learning_rate": 6.306974460813986e-07, + "loss": 0.2330242097377777, + "step": 3612 + }, + { + "epoch": 1.7853700729025084, + "grad_norm": 1.445863340067418, + "learning_rate": 6.278448180794416e-07, + "loss": 0.25513261556625366, + "step": 3613 + }, + { + "epoch": 1.7858643271963426, + "grad_norm": 1.3248647587522469, + "learning_rate": 6.249984467943737e-07, + "loss": 0.2298405021429062, + "step": 3614 + }, + { + "epoch": 1.7863585814901768, + "grad_norm": 1.3090685428520892, + "learning_rate": 6.221583341263893e-07, + "loss": 0.22120623290538788, + "step": 3615 + }, + { + "epoch": 1.786852835784011, + "grad_norm": 1.3392765156774626, + "learning_rate": 6.193244819715072e-07, + "loss": 0.26976969838142395, + "step": 3616 + }, + { + "epoch": 1.7873470900778452, + "grad_norm": 1.3657180436845977, + "learning_rate": 6.164968922215697e-07, + "loss": 0.24354586005210876, + "step": 3617 + }, + { + "epoch": 1.7878413443716792, + "grad_norm": 1.4254233164600292, + "learning_rate": 6.136755667642302e-07, + "loss": 0.2849498689174652, + "step": 3618 + }, + { + "epoch": 1.7883355986655134, + "grad_norm": 1.2708453781613391, + "learning_rate": 6.10860507482971e-07, + "loss": 0.2431584596633911, + "step": 3619 + }, + { + "epoch": 1.7888298529593476, + "grad_norm": 1.5031154285158648, + "learning_rate": 6.080517162570809e-07, + "loss": 0.2384781688451767, + "step": 3620 + }, + { + "epoch": 1.7893241072531818, + "grad_norm": 1.45686854578023, + "learning_rate": 6.052491949616712e-07, + "loss": 0.23782339692115784, + "step": 3621 + }, + { + "epoch": 1.7898183615470158, + "grad_norm": 1.342733882676876, + "learning_rate": 6.024529454676631e-07, + "loss": 0.23293447494506836, + "step": 3622 + }, + { + "epoch": 1.79031261584085, + "grad_norm": 1.2930495337650696, + "learning_rate": 5.996629696417955e-07, + "loss": 0.21202662587165833, + "step": 3623 + }, + { + "epoch": 1.7908068701346842, + "grad_norm": 1.5889243123202152, + "learning_rate": 5.968792693466141e-07, + "loss": 0.27971768379211426, + "step": 3624 + }, + { + "epoch": 1.7913011244285184, + "grad_norm": 1.441999540970622, + "learning_rate": 5.94101846440478e-07, + "loss": 0.2433638721704483, + "step": 3625 + }, + { + "epoch": 1.7917953787223526, + "grad_norm": 1.3682285780053611, + "learning_rate": 5.91330702777555e-07, + "loss": 0.21812602877616882, + "step": 3626 + }, + { + "epoch": 1.7922896330161868, + "grad_norm": 1.924541384200403, + "learning_rate": 5.88565840207822e-07, + "loss": 0.2135028839111328, + "step": 3627 + }, + { + "epoch": 1.792783887310021, + "grad_norm": 1.3226125497456243, + "learning_rate": 5.858072605770626e-07, + "loss": 0.23919226229190826, + "step": 3628 + }, + { + "epoch": 1.7932781416038552, + "grad_norm": 1.3008122554752455, + "learning_rate": 5.830549657268614e-07, + "loss": 0.2495008111000061, + "step": 3629 + }, + { + "epoch": 1.7937723958976894, + "grad_norm": 1.4679589100669386, + "learning_rate": 5.80308957494613e-07, + "loss": 0.2531805634498596, + "step": 3630 + }, + { + "epoch": 1.7942666501915236, + "grad_norm": 1.2654762717037664, + "learning_rate": 5.775692377135156e-07, + "loss": 0.22644619643688202, + "step": 3631 + }, + { + "epoch": 1.7947609044853579, + "grad_norm": 1.2567004368149646, + "learning_rate": 5.748358082125638e-07, + "loss": 0.2264411598443985, + "step": 3632 + }, + { + "epoch": 1.7952551587791918, + "grad_norm": 1.3206987713043599, + "learning_rate": 5.721086708165568e-07, + "loss": 0.2663921117782593, + "step": 3633 + }, + { + "epoch": 1.795749413073026, + "grad_norm": 1.35703763331278, + "learning_rate": 5.693878273460951e-07, + "loss": 0.2398051619529724, + "step": 3634 + }, + { + "epoch": 1.7962436673668603, + "grad_norm": 1.4184943078470147, + "learning_rate": 5.6667327961757e-07, + "loss": 0.28781580924987793, + "step": 3635 + }, + { + "epoch": 1.7967379216606945, + "grad_norm": 2.1761368991988084, + "learning_rate": 5.639650294431787e-07, + "loss": 0.2232055813074112, + "step": 3636 + }, + { + "epoch": 1.7972321759545284, + "grad_norm": 1.402577073030083, + "learning_rate": 5.612630786309103e-07, + "loss": 0.23214340209960938, + "step": 3637 + }, + { + "epoch": 1.7977264302483627, + "grad_norm": 1.2714718799747338, + "learning_rate": 5.585674289845467e-07, + "loss": 0.21598659455776215, + "step": 3638 + }, + { + "epoch": 1.7982206845421969, + "grad_norm": 1.351029180109128, + "learning_rate": 5.558780823036658e-07, + "loss": 0.2760176956653595, + "step": 3639 + }, + { + "epoch": 1.798714938836031, + "grad_norm": 1.3941723061811673, + "learning_rate": 5.531950403836373e-07, + "loss": 0.2641429901123047, + "step": 3640 + }, + { + "epoch": 1.7992091931298653, + "grad_norm": 1.390874465362023, + "learning_rate": 5.505183050156204e-07, + "loss": 0.2407502382993698, + "step": 3641 + }, + { + "epoch": 1.7997034474236995, + "grad_norm": 1.2164247841450622, + "learning_rate": 5.478478779865682e-07, + "loss": 0.19910940527915955, + "step": 3642 + }, + { + "epoch": 1.8001977017175337, + "grad_norm": 1.4412656091937792, + "learning_rate": 5.451837610792166e-07, + "loss": 0.2716234624385834, + "step": 3643 + }, + { + "epoch": 1.800691956011368, + "grad_norm": 1.3284477963142056, + "learning_rate": 5.42525956072093e-07, + "loss": 0.2784198224544525, + "step": 3644 + }, + { + "epoch": 1.801186210305202, + "grad_norm": 1.3444314874013155, + "learning_rate": 5.398744647395104e-07, + "loss": 0.2277904599905014, + "step": 3645 + }, + { + "epoch": 1.8016804645990363, + "grad_norm": 1.4299842617414134, + "learning_rate": 5.372292888515684e-07, + "loss": 0.26788002252578735, + "step": 3646 + }, + { + "epoch": 1.8021747188928705, + "grad_norm": 1.3607541160674654, + "learning_rate": 5.345904301741445e-07, + "loss": 0.22452175617218018, + "step": 3647 + }, + { + "epoch": 1.8026689731867047, + "grad_norm": 1.44450101040719, + "learning_rate": 5.319578904689071e-07, + "loss": 0.2337179332971573, + "step": 3648 + }, + { + "epoch": 1.8031632274805387, + "grad_norm": 1.3116281040368842, + "learning_rate": 5.293316714932983e-07, + "loss": 0.2614130973815918, + "step": 3649 + }, + { + "epoch": 1.803657481774373, + "grad_norm": 1.3142722561763884, + "learning_rate": 5.267117750005468e-07, + "loss": 0.2577320635318756, + "step": 3650 + }, + { + "epoch": 1.8041517360682071, + "grad_norm": 1.231846526151871, + "learning_rate": 5.24098202739658e-07, + "loss": 0.2058672308921814, + "step": 3651 + }, + { + "epoch": 1.8046459903620413, + "grad_norm": 1.3970882237865128, + "learning_rate": 5.214909564554138e-07, + "loss": 0.25223514437675476, + "step": 3652 + }, + { + "epoch": 1.8051402446558753, + "grad_norm": 1.3683940041570406, + "learning_rate": 5.188900378883765e-07, + "loss": 0.25651872158050537, + "step": 3653 + }, + { + "epoch": 1.8056344989497095, + "grad_norm": 1.3167902113360206, + "learning_rate": 5.162954487748828e-07, + "loss": 0.257855623960495, + "step": 3654 + }, + { + "epoch": 1.8061287532435437, + "grad_norm": 1.3408137381423195, + "learning_rate": 5.137071908470381e-07, + "loss": 0.22942093014717102, + "step": 3655 + }, + { + "epoch": 1.806623007537378, + "grad_norm": 1.3905585042591802, + "learning_rate": 5.111252658327326e-07, + "loss": 0.25629153847694397, + "step": 3656 + }, + { + "epoch": 1.8071172618312121, + "grad_norm": 1.3417957205977868, + "learning_rate": 5.085496754556207e-07, + "loss": 0.23882299661636353, + "step": 3657 + }, + { + "epoch": 1.8076115161250463, + "grad_norm": 1.3092883951034957, + "learning_rate": 5.059804214351283e-07, + "loss": 0.2323160469532013, + "step": 3658 + }, + { + "epoch": 1.8081057704188805, + "grad_norm": 1.318607555394289, + "learning_rate": 5.034175054864531e-07, + "loss": 0.2080869972705841, + "step": 3659 + }, + { + "epoch": 1.8086000247127147, + "grad_norm": 1.476319660825777, + "learning_rate": 5.008609293205624e-07, + "loss": 0.22439511120319366, + "step": 3660 + }, + { + "epoch": 1.809094279006549, + "grad_norm": 1.3639928518895943, + "learning_rate": 4.983106946441885e-07, + "loss": 0.2527809739112854, + "step": 3661 + }, + { + "epoch": 1.8095885333003832, + "grad_norm": 1.181172468164539, + "learning_rate": 4.957668031598328e-07, + "loss": 0.2149294763803482, + "step": 3662 + }, + { + "epoch": 1.8100827875942174, + "grad_norm": 1.3244234520799762, + "learning_rate": 4.932292565657615e-07, + "loss": 0.2471565306186676, + "step": 3663 + }, + { + "epoch": 1.8105770418880514, + "grad_norm": 1.328701941509414, + "learning_rate": 4.906980565560004e-07, + "loss": 0.25820282101631165, + "step": 3664 + }, + { + "epoch": 1.8110712961818856, + "grad_norm": 1.4538113944792308, + "learning_rate": 4.881732048203469e-07, + "loss": 0.2815645933151245, + "step": 3665 + }, + { + "epoch": 1.8115655504757198, + "grad_norm": 1.4078938194960222, + "learning_rate": 4.856547030443559e-07, + "loss": 0.23443330824375153, + "step": 3666 + }, + { + "epoch": 1.812059804769554, + "grad_norm": 1.413689966723704, + "learning_rate": 4.831425529093403e-07, + "loss": 0.2452373206615448, + "step": 3667 + }, + { + "epoch": 1.812554059063388, + "grad_norm": 1.2405057526282826, + "learning_rate": 4.806367560923764e-07, + "loss": 0.21815839409828186, + "step": 3668 + }, + { + "epoch": 1.8130483133572222, + "grad_norm": 1.3418751770168684, + "learning_rate": 4.781373142663003e-07, + "loss": 0.23436316847801208, + "step": 3669 + }, + { + "epoch": 1.8135425676510564, + "grad_norm": 1.277189547676361, + "learning_rate": 4.75644229099701e-07, + "loss": 0.18917132914066315, + "step": 3670 + }, + { + "epoch": 1.8140368219448906, + "grad_norm": 1.3842801505047626, + "learning_rate": 4.7315750225692905e-07, + "loss": 0.24570351839065552, + "step": 3671 + }, + { + "epoch": 1.8145310762387248, + "grad_norm": 1.2514343072057177, + "learning_rate": 4.7067713539808543e-07, + "loss": 0.23367956280708313, + "step": 3672 + }, + { + "epoch": 1.815025330532559, + "grad_norm": 1.372723501995688, + "learning_rate": 4.682031301790291e-07, + "loss": 0.24563322961330414, + "step": 3673 + }, + { + "epoch": 1.8155195848263932, + "grad_norm": 1.3552399849082646, + "learning_rate": 4.6573548825137204e-07, + "loss": 0.2425815761089325, + "step": 3674 + }, + { + "epoch": 1.8160138391202274, + "grad_norm": 1.2732667032266225, + "learning_rate": 4.632742112624744e-07, + "loss": 0.2173803597688675, + "step": 3675 + }, + { + "epoch": 1.8165080934140616, + "grad_norm": 1.4674070434763509, + "learning_rate": 4.6081930085544734e-07, + "loss": 0.2665477395057678, + "step": 3676 + }, + { + "epoch": 1.8170023477078958, + "grad_norm": 1.2335396057121188, + "learning_rate": 4.5837075866915994e-07, + "loss": 0.23834756016731262, + "step": 3677 + }, + { + "epoch": 1.81749660200173, + "grad_norm": 1.3614176095599289, + "learning_rate": 4.55928586338219e-07, + "loss": 0.2479294240474701, + "step": 3678 + }, + { + "epoch": 1.8179908562955642, + "grad_norm": 1.370567608566195, + "learning_rate": 4.5349278549298716e-07, + "loss": 0.24136531352996826, + "step": 3679 + }, + { + "epoch": 1.8184851105893982, + "grad_norm": 1.3881148070094378, + "learning_rate": 4.510633577595669e-07, + "loss": 0.24397623538970947, + "step": 3680 + }, + { + "epoch": 1.8189793648832324, + "grad_norm": 1.3189259944629108, + "learning_rate": 4.48640304759812e-07, + "loss": 0.27078694105148315, + "step": 3681 + }, + { + "epoch": 1.8194736191770666, + "grad_norm": 1.5222352072420349, + "learning_rate": 4.4622362811131745e-07, + "loss": 0.2544251084327698, + "step": 3682 + }, + { + "epoch": 1.8199678734709008, + "grad_norm": 1.3696668102162666, + "learning_rate": 4.4381332942742384e-07, + "loss": 0.2528873682022095, + "step": 3683 + }, + { + "epoch": 1.8204621277647348, + "grad_norm": 1.470119432024013, + "learning_rate": 4.414094103172084e-07, + "loss": 0.25487592816352844, + "step": 3684 + }, + { + "epoch": 1.820956382058569, + "grad_norm": 1.3872878168023053, + "learning_rate": 4.3901187238549414e-07, + "loss": 0.22061187028884888, + "step": 3685 + }, + { + "epoch": 1.8214506363524032, + "grad_norm": 1.355863796177502, + "learning_rate": 4.366207172328452e-07, + "loss": 0.2793615758419037, + "step": 3686 + }, + { + "epoch": 1.8219448906462374, + "grad_norm": 1.2429295933181803, + "learning_rate": 4.342359464555612e-07, + "loss": 0.2323140949010849, + "step": 3687 + }, + { + "epoch": 1.8224391449400716, + "grad_norm": 1.370663497944958, + "learning_rate": 4.3185756164568104e-07, + "loss": 0.2616409659385681, + "step": 3688 + }, + { + "epoch": 1.8229333992339058, + "grad_norm": 1.3843956978002738, + "learning_rate": 4.294855643909812e-07, + "loss": 0.203874871134758, + "step": 3689 + }, + { + "epoch": 1.82342765352774, + "grad_norm": 1.2289114807067458, + "learning_rate": 4.271199562749717e-07, + "loss": 0.2272878736257553, + "step": 3690 + }, + { + "epoch": 1.8239219078215743, + "grad_norm": 1.338434972419624, + "learning_rate": 4.247607388769004e-07, + "loss": 0.23728047311306, + "step": 3691 + }, + { + "epoch": 1.8244161621154085, + "grad_norm": 1.4750745226923418, + "learning_rate": 4.2240791377174737e-07, + "loss": 0.2570911943912506, + "step": 3692 + }, + { + "epoch": 1.8249104164092427, + "grad_norm": 1.4969254471055817, + "learning_rate": 4.200614825302207e-07, + "loss": 0.24265727400779724, + "step": 3693 + }, + { + "epoch": 1.8254046707030769, + "grad_norm": 1.405819385173928, + "learning_rate": 4.177214467187707e-07, + "loss": 0.24822816252708435, + "step": 3694 + }, + { + "epoch": 1.8258989249969109, + "grad_norm": 1.3218266218091017, + "learning_rate": 4.153878078995677e-07, + "loss": 0.23382046818733215, + "step": 3695 + }, + { + "epoch": 1.826393179290745, + "grad_norm": 1.4037010093048616, + "learning_rate": 4.130605676305166e-07, + "loss": 0.27590304613113403, + "step": 3696 + }, + { + "epoch": 1.8268874335845793, + "grad_norm": 1.4161501438852775, + "learning_rate": 4.1073972746525026e-07, + "loss": 0.25702038407325745, + "step": 3697 + }, + { + "epoch": 1.8273816878784135, + "grad_norm": 1.488627338365754, + "learning_rate": 4.0842528895312707e-07, + "loss": 0.28980135917663574, + "step": 3698 + }, + { + "epoch": 1.8278759421722475, + "grad_norm": 1.5075437506896323, + "learning_rate": 4.0611725363923435e-07, + "loss": 0.22739271819591522, + "step": 3699 + }, + { + "epoch": 1.8283701964660817, + "grad_norm": 1.4671495030162094, + "learning_rate": 4.038156230643853e-07, + "loss": 0.26396334171295166, + "step": 3700 + }, + { + "epoch": 1.8288644507599159, + "grad_norm": 1.5855861974203058, + "learning_rate": 4.015203987651106e-07, + "loss": 0.25548964738845825, + "step": 3701 + }, + { + "epoch": 1.82935870505375, + "grad_norm": 1.3315259515817186, + "learning_rate": 3.992315822736725e-07, + "loss": 0.22227105498313904, + "step": 3702 + }, + { + "epoch": 1.8298529593475843, + "grad_norm": 1.445413897274288, + "learning_rate": 3.969491751180543e-07, + "loss": 0.30854254961013794, + "step": 3703 + }, + { + "epoch": 1.8303472136414185, + "grad_norm": 1.4678349464130562, + "learning_rate": 3.946731788219538e-07, + "loss": 0.27471429109573364, + "step": 3704 + }, + { + "epoch": 1.8308414679352527, + "grad_norm": 1.334822235698922, + "learning_rate": 3.924035949047955e-07, + "loss": 0.2317768633365631, + "step": 3705 + }, + { + "epoch": 1.831335722229087, + "grad_norm": 1.4197098897896443, + "learning_rate": 3.901404248817231e-07, + "loss": 0.2450723946094513, + "step": 3706 + }, + { + "epoch": 1.8318299765229211, + "grad_norm": 1.4676009490842072, + "learning_rate": 3.878836702635935e-07, + "loss": 0.2428039014339447, + "step": 3707 + }, + { + "epoch": 1.8323242308167553, + "grad_norm": 1.4376208196933993, + "learning_rate": 3.856333325569861e-07, + "loss": 0.27869629859924316, + "step": 3708 + }, + { + "epoch": 1.8328184851105895, + "grad_norm": 1.2808253694997749, + "learning_rate": 3.8338941326419353e-07, + "loss": 0.21661749482154846, + "step": 3709 + }, + { + "epoch": 1.8333127394044237, + "grad_norm": 1.3452610575891626, + "learning_rate": 3.8115191388322206e-07, + "loss": 0.2655249834060669, + "step": 3710 + }, + { + "epoch": 1.8338069936982577, + "grad_norm": 1.3643896556477109, + "learning_rate": 3.7892083590779784e-07, + "loss": 0.2281903475522995, + "step": 3711 + }, + { + "epoch": 1.834301247992092, + "grad_norm": 1.492937654145658, + "learning_rate": 3.7669618082735504e-07, + "loss": 0.24545446038246155, + "step": 3712 + }, + { + "epoch": 1.8347955022859261, + "grad_norm": 1.2788794377367898, + "learning_rate": 3.7447795012704237e-07, + "loss": 0.24749556183815002, + "step": 3713 + }, + { + "epoch": 1.8352897565797601, + "grad_norm": 1.4606135919595513, + "learning_rate": 3.722661452877163e-07, + "loss": 0.26234689354896545, + "step": 3714 + }, + { + "epoch": 1.8357840108735943, + "grad_norm": 1.3697239858165842, + "learning_rate": 3.700607677859491e-07, + "loss": 0.21348389983177185, + "step": 3715 + }, + { + "epoch": 1.8362782651674285, + "grad_norm": 1.3198403259649356, + "learning_rate": 3.6786181909401864e-07, + "loss": 0.2527744770050049, + "step": 3716 + }, + { + "epoch": 1.8367725194612627, + "grad_norm": 1.3153305717810528, + "learning_rate": 3.6566930067991056e-07, + "loss": 0.2175026535987854, + "step": 3717 + }, + { + "epoch": 1.837266773755097, + "grad_norm": 1.3795015677920492, + "learning_rate": 3.6348321400731967e-07, + "loss": 0.2847272753715515, + "step": 3718 + }, + { + "epoch": 1.8377610280489312, + "grad_norm": 1.4885049894439106, + "learning_rate": 3.613035605356463e-07, + "loss": 0.2549072504043579, + "step": 3719 + }, + { + "epoch": 1.8382552823427654, + "grad_norm": 1.3444222427486383, + "learning_rate": 3.591303417199965e-07, + "loss": 0.24534013867378235, + "step": 3720 + }, + { + "epoch": 1.8387495366365996, + "grad_norm": 1.461602538702394, + "learning_rate": 3.5696355901117865e-07, + "loss": 0.25336408615112305, + "step": 3721 + }, + { + "epoch": 1.8392437909304338, + "grad_norm": 1.4932038589381658, + "learning_rate": 3.548032138557056e-07, + "loss": 0.2787632346153259, + "step": 3722 + }, + { + "epoch": 1.839738045224268, + "grad_norm": 1.3687827308256, + "learning_rate": 3.5264930769579595e-07, + "loss": 0.22364875674247742, + "step": 3723 + }, + { + "epoch": 1.8402322995181022, + "grad_norm": 1.509493433022075, + "learning_rate": 3.5050184196936285e-07, + "loss": 0.2526230216026306, + "step": 3724 + }, + { + "epoch": 1.8407265538119364, + "grad_norm": 1.449998297788816, + "learning_rate": 3.483608181100262e-07, + "loss": 0.2412932962179184, + "step": 3725 + }, + { + "epoch": 1.8412208081057704, + "grad_norm": 1.4100243345912178, + "learning_rate": 3.462262375471026e-07, + "loss": 0.28693705797195435, + "step": 3726 + }, + { + "epoch": 1.8417150623996046, + "grad_norm": 1.4369299703462226, + "learning_rate": 3.4409810170560667e-07, + "loss": 0.2600281834602356, + "step": 3727 + }, + { + "epoch": 1.8422093166934388, + "grad_norm": 1.3702328145360616, + "learning_rate": 3.4197641200625185e-07, + "loss": 0.24885150790214539, + "step": 3728 + }, + { + "epoch": 1.842703570987273, + "grad_norm": 1.476451776245579, + "learning_rate": 3.398611698654497e-07, + "loss": 0.27185115218162537, + "step": 3729 + }, + { + "epoch": 1.843197825281107, + "grad_norm": 1.6779196665373166, + "learning_rate": 3.377523766953006e-07, + "loss": 0.2999323010444641, + "step": 3730 + }, + { + "epoch": 1.8436920795749412, + "grad_norm": 1.3755033406487114, + "learning_rate": 3.356500339036106e-07, + "loss": 0.22807806730270386, + "step": 3731 + }, + { + "epoch": 1.8441863338687754, + "grad_norm": 1.4727836521575108, + "learning_rate": 3.3355414289387155e-07, + "loss": 0.23006726801395416, + "step": 3732 + }, + { + "epoch": 1.8446805881626096, + "grad_norm": 1.4892072813513704, + "learning_rate": 3.314647050652686e-07, + "loss": 0.25261276960372925, + "step": 3733 + }, + { + "epoch": 1.8451748424564438, + "grad_norm": 1.3741598151970273, + "learning_rate": 3.293817218126827e-07, + "loss": 0.2484148144721985, + "step": 3734 + }, + { + "epoch": 1.845669096750278, + "grad_norm": 1.2679669997107472, + "learning_rate": 3.273051945266836e-07, + "loss": 0.2472834438085556, + "step": 3735 + }, + { + "epoch": 1.8461633510441122, + "grad_norm": 1.16756829401485, + "learning_rate": 3.2523512459352923e-07, + "loss": 0.20510706305503845, + "step": 3736 + }, + { + "epoch": 1.8466576053379464, + "grad_norm": 1.292644423038628, + "learning_rate": 3.231715133951707e-07, + "loss": 0.2331993281841278, + "step": 3737 + }, + { + "epoch": 1.8471518596317806, + "grad_norm": 1.4584815860954135, + "learning_rate": 3.211143623092461e-07, + "loss": 0.2704228162765503, + "step": 3738 + }, + { + "epoch": 1.8476461139256148, + "grad_norm": 1.4579018041488718, + "learning_rate": 3.190636727090768e-07, + "loss": 0.2514714002609253, + "step": 3739 + }, + { + "epoch": 1.848140368219449, + "grad_norm": 1.258977256920419, + "learning_rate": 3.170194459636777e-07, + "loss": 0.2396089732646942, + "step": 3740 + }, + { + "epoch": 1.848634622513283, + "grad_norm": 1.4139144003983488, + "learning_rate": 3.149816834377428e-07, + "loss": 0.266484797000885, + "step": 3741 + }, + { + "epoch": 1.8491288768071172, + "grad_norm": 1.338105672337281, + "learning_rate": 3.129503864916539e-07, + "loss": 0.24549749493598938, + "step": 3742 + }, + { + "epoch": 1.8496231311009514, + "grad_norm": 1.6902480251834826, + "learning_rate": 3.1092555648147615e-07, + "loss": 0.2659090757369995, + "step": 3743 + }, + { + "epoch": 1.8501173853947857, + "grad_norm": 1.4018081288366548, + "learning_rate": 3.0890719475895615e-07, + "loss": 0.2756732702255249, + "step": 3744 + }, + { + "epoch": 1.8506116396886196, + "grad_norm": 1.3509953718874834, + "learning_rate": 3.068953026715238e-07, + "loss": 0.2568710148334503, + "step": 3745 + }, + { + "epoch": 1.8511058939824538, + "grad_norm": 1.3512798325752944, + "learning_rate": 3.048898815622914e-07, + "loss": 0.2255566120147705, + "step": 3746 + }, + { + "epoch": 1.851600148276288, + "grad_norm": 1.309385732750396, + "learning_rate": 3.028909327700458e-07, + "loss": 0.2083941102027893, + "step": 3747 + }, + { + "epoch": 1.8520944025701223, + "grad_norm": 1.2287507621351796, + "learning_rate": 3.0089845762926063e-07, + "loss": 0.20739290118217468, + "step": 3748 + }, + { + "epoch": 1.8525886568639565, + "grad_norm": 1.2356251229389228, + "learning_rate": 2.989124574700819e-07, + "loss": 0.21835210919380188, + "step": 3749 + }, + { + "epoch": 1.8530829111577907, + "grad_norm": 1.312598409351232, + "learning_rate": 2.969329336183335e-07, + "loss": 0.2170596569776535, + "step": 3750 + }, + { + "epoch": 1.8535771654516249, + "grad_norm": 1.3990932569701935, + "learning_rate": 2.949598873955184e-07, + "loss": 0.23584111034870148, + "step": 3751 + }, + { + "epoch": 1.854071419745459, + "grad_norm": 1.5531646127161125, + "learning_rate": 2.9299332011881623e-07, + "loss": 0.2690342664718628, + "step": 3752 + }, + { + "epoch": 1.8545656740392933, + "grad_norm": 1.2634424740078676, + "learning_rate": 2.9103323310107566e-07, + "loss": 0.2499091923236847, + "step": 3753 + }, + { + "epoch": 1.8550599283331275, + "grad_norm": 1.417744173198578, + "learning_rate": 2.8907962765082567e-07, + "loss": 0.23112377524375916, + "step": 3754 + }, + { + "epoch": 1.8555541826269617, + "grad_norm": 1.375590332914505, + "learning_rate": 2.8713250507226285e-07, + "loss": 0.25203657150268555, + "step": 3755 + }, + { + "epoch": 1.856048436920796, + "grad_norm": 1.4015552448571456, + "learning_rate": 2.8519186666526086e-07, + "loss": 0.2468508780002594, + "step": 3756 + }, + { + "epoch": 1.85654269121463, + "grad_norm": 1.427563584784084, + "learning_rate": 2.8325771372536e-07, + "loss": 0.22745928168296814, + "step": 3757 + }, + { + "epoch": 1.857036945508464, + "grad_norm": 1.2932963376428803, + "learning_rate": 2.8133004754377525e-07, + "loss": 0.23090660572052002, + "step": 3758 + }, + { + "epoch": 1.8575311998022983, + "grad_norm": 1.420318152152914, + "learning_rate": 2.7940886940738707e-07, + "loss": 0.27513352036476135, + "step": 3759 + }, + { + "epoch": 1.8580254540961325, + "grad_norm": 1.4517333399175874, + "learning_rate": 2.774941805987474e-07, + "loss": 0.25791019201278687, + "step": 3760 + }, + { + "epoch": 1.8585197083899665, + "grad_norm": 1.523404531013776, + "learning_rate": 2.75585982396076e-07, + "loss": 0.2703961730003357, + "step": 3761 + }, + { + "epoch": 1.8590139626838007, + "grad_norm": 1.4198437134006967, + "learning_rate": 2.736842760732561e-07, + "loss": 0.2557608485221863, + "step": 3762 + }, + { + "epoch": 1.859508216977635, + "grad_norm": 1.4276231211370918, + "learning_rate": 2.717890628998421e-07, + "loss": 0.26276740431785583, + "step": 3763 + }, + { + "epoch": 1.8600024712714691, + "grad_norm": 1.3830597360775128, + "learning_rate": 2.699003441410508e-07, + "loss": 0.3033446967601776, + "step": 3764 + }, + { + "epoch": 1.8604967255653033, + "grad_norm": 1.3975518004533982, + "learning_rate": 2.680181210577637e-07, + "loss": 0.2513597905635834, + "step": 3765 + }, + { + "epoch": 1.8609909798591375, + "grad_norm": 1.2527716887935596, + "learning_rate": 2.661423949065267e-07, + "loss": 0.22935059666633606, + "step": 3766 + }, + { + "epoch": 1.8614852341529717, + "grad_norm": 1.5028347517247218, + "learning_rate": 2.6427316693954596e-07, + "loss": 0.2585369348526001, + "step": 3767 + }, + { + "epoch": 1.861979488446806, + "grad_norm": 1.4129565265857094, + "learning_rate": 2.6241043840469104e-07, + "loss": 0.25701645016670227, + "step": 3768 + }, + { + "epoch": 1.8624737427406401, + "grad_norm": 1.304405538262163, + "learning_rate": 2.605542105454961e-07, + "loss": 0.24622182548046112, + "step": 3769 + }, + { + "epoch": 1.8629679970344744, + "grad_norm": 1.380891732165765, + "learning_rate": 2.5870448460114994e-07, + "loss": 0.2650758624076843, + "step": 3770 + }, + { + "epoch": 1.8634622513283086, + "grad_norm": 1.4721649336836553, + "learning_rate": 2.568612618065036e-07, + "loss": 0.2364269644021988, + "step": 3771 + }, + { + "epoch": 1.8639565056221425, + "grad_norm": 1.2217358212004363, + "learning_rate": 2.5502454339206617e-07, + "loss": 0.23226915299892426, + "step": 3772 + }, + { + "epoch": 1.8644507599159768, + "grad_norm": 1.3407554644381927, + "learning_rate": 2.5319433058400565e-07, + "loss": 0.23077306151390076, + "step": 3773 + }, + { + "epoch": 1.864945014209811, + "grad_norm": 1.289395146095016, + "learning_rate": 2.5137062460414476e-07, + "loss": 0.23707103729248047, + "step": 3774 + }, + { + "epoch": 1.8654392685036452, + "grad_norm": 1.3571808886592325, + "learning_rate": 2.4955342666996505e-07, + "loss": 0.268571138381958, + "step": 3775 + }, + { + "epoch": 1.8659335227974791, + "grad_norm": 1.4298616373621023, + "learning_rate": 2.4774273799459847e-07, + "loss": 0.21469517052173615, + "step": 3776 + }, + { + "epoch": 1.8664277770913134, + "grad_norm": 1.302386517113681, + "learning_rate": 2.45938559786838e-07, + "loss": 0.2513999938964844, + "step": 3777 + }, + { + "epoch": 1.8669220313851476, + "grad_norm": 1.2688339559395354, + "learning_rate": 2.44140893251128e-07, + "loss": 0.23660680651664734, + "step": 3778 + }, + { + "epoch": 1.8674162856789818, + "grad_norm": 1.499995655954345, + "learning_rate": 2.423497395875618e-07, + "loss": 0.24594557285308838, + "step": 3779 + }, + { + "epoch": 1.867910539972816, + "grad_norm": 1.4315211319459857, + "learning_rate": 2.405650999918896e-07, + "loss": 0.2725435793399811, + "step": 3780 + }, + { + "epoch": 1.8684047942666502, + "grad_norm": 1.3565937935517103, + "learning_rate": 2.3878697565551167e-07, + "loss": 0.25718316435813904, + "step": 3781 + }, + { + "epoch": 1.8688990485604844, + "grad_norm": 1.3523272274009415, + "learning_rate": 2.3701536776547851e-07, + "loss": 0.2546181082725525, + "step": 3782 + }, + { + "epoch": 1.8693933028543186, + "grad_norm": 1.1875597307843324, + "learning_rate": 2.3525027750448959e-07, + "loss": 0.22146770358085632, + "step": 3783 + }, + { + "epoch": 1.8698875571481528, + "grad_norm": 1.5616036933474096, + "learning_rate": 2.3349170605089456e-07, + "loss": 0.23873519897460938, + "step": 3784 + }, + { + "epoch": 1.870381811441987, + "grad_norm": 1.3056198220614723, + "learning_rate": 2.3173965457868875e-07, + "loss": 0.2530808746814728, + "step": 3785 + }, + { + "epoch": 1.8708760657358212, + "grad_norm": 1.5174642956273923, + "learning_rate": 2.2999412425751987e-07, + "loss": 0.21616236865520477, + "step": 3786 + }, + { + "epoch": 1.8713703200296554, + "grad_norm": 1.3867713509711206, + "learning_rate": 2.2825511625267583e-07, + "loss": 0.21596969664096832, + "step": 3787 + }, + { + "epoch": 1.8718645743234894, + "grad_norm": 1.4557650561795843, + "learning_rate": 2.265226317250957e-07, + "loss": 0.25873616337776184, + "step": 3788 + }, + { + "epoch": 1.8723588286173236, + "grad_norm": 1.3108065941801126, + "learning_rate": 2.247966718313599e-07, + "loss": 0.21096865832805634, + "step": 3789 + }, + { + "epoch": 1.8728530829111578, + "grad_norm": 1.374596799099242, + "learning_rate": 2.230772377236956e-07, + "loss": 0.2159111499786377, + "step": 3790 + }, + { + "epoch": 1.8733473372049918, + "grad_norm": 1.3658642346441578, + "learning_rate": 2.213643305499724e-07, + "loss": 0.2264566719532013, + "step": 3791 + }, + { + "epoch": 1.873841591498826, + "grad_norm": 1.2529368730648867, + "learning_rate": 2.1965795145370338e-07, + "loss": 0.216034397482872, + "step": 3792 + }, + { + "epoch": 1.8743358457926602, + "grad_norm": 1.2144868387665828, + "learning_rate": 2.1795810157404063e-07, + "loss": 0.22257745265960693, + "step": 3793 + }, + { + "epoch": 1.8748301000864944, + "grad_norm": 1.5075158608293073, + "learning_rate": 2.1626478204578082e-07, + "loss": 0.2569161653518677, + "step": 3794 + }, + { + "epoch": 1.8753243543803286, + "grad_norm": 1.3028902539101006, + "learning_rate": 2.1457799399936087e-07, + "loss": 0.24172556400299072, + "step": 3795 + }, + { + "epoch": 1.8758186086741628, + "grad_norm": 1.4100197142967315, + "learning_rate": 2.128977385608555e-07, + "loss": 0.25539106130599976, + "step": 3796 + }, + { + "epoch": 1.876312862967997, + "grad_norm": 1.3564195764364628, + "learning_rate": 2.1122401685197747e-07, + "loss": 0.23766650259494781, + "step": 3797 + }, + { + "epoch": 1.8768071172618312, + "grad_norm": 2.0847437292387516, + "learning_rate": 2.095568299900841e-07, + "loss": 0.24102288484573364, + "step": 3798 + }, + { + "epoch": 1.8773013715556655, + "grad_norm": 1.4163898812472968, + "learning_rate": 2.0789617908816063e-07, + "loss": 0.25168395042419434, + "step": 3799 + }, + { + "epoch": 1.8777956258494997, + "grad_norm": 1.2853968722580162, + "learning_rate": 2.0624206525483582e-07, + "loss": 0.23417149484157562, + "step": 3800 + }, + { + "epoch": 1.8782898801433339, + "grad_norm": 1.4002834822702614, + "learning_rate": 2.04594489594373e-07, + "loss": 0.2875264883041382, + "step": 3801 + }, + { + "epoch": 1.878784134437168, + "grad_norm": 1.3714454637927955, + "learning_rate": 2.0295345320667014e-07, + "loss": 0.24828693270683289, + "step": 3802 + }, + { + "epoch": 1.879278388731002, + "grad_norm": 1.3521250596424406, + "learning_rate": 2.013189571872587e-07, + "loss": 0.23279064893722534, + "step": 3803 + }, + { + "epoch": 1.8797726430248363, + "grad_norm": 1.1425181629308492, + "learning_rate": 1.996910026273058e-07, + "loss": 0.2099420577287674, + "step": 3804 + }, + { + "epoch": 1.8802668973186705, + "grad_norm": 1.346362344532125, + "learning_rate": 1.9806959061360985e-07, + "loss": 0.25043174624443054, + "step": 3805 + }, + { + "epoch": 1.8807611516125047, + "grad_norm": 1.3680517059526944, + "learning_rate": 1.9645472222860286e-07, + "loss": 0.2606011927127838, + "step": 3806 + }, + { + "epoch": 1.8812554059063387, + "grad_norm": 1.2606250431650987, + "learning_rate": 1.948463985503468e-07, + "loss": 0.22487565875053406, + "step": 3807 + }, + { + "epoch": 1.8817496602001729, + "grad_norm": 1.6823729371263936, + "learning_rate": 1.9324462065253735e-07, + "loss": 0.29611343145370483, + "step": 3808 + }, + { + "epoch": 1.882243914494007, + "grad_norm": 1.282763458334529, + "learning_rate": 1.9164938960449685e-07, + "loss": 0.2301706224679947, + "step": 3809 + }, + { + "epoch": 1.8827381687878413, + "grad_norm": 1.319243063789466, + "learning_rate": 1.9006070647118015e-07, + "loss": 0.2306794822216034, + "step": 3810 + }, + { + "epoch": 1.8832324230816755, + "grad_norm": 1.4208055299495237, + "learning_rate": 1.884785723131688e-07, + "loss": 0.2588786482810974, + "step": 3811 + }, + { + "epoch": 1.8837266773755097, + "grad_norm": 1.527285475263959, + "learning_rate": 1.8690298818667463e-07, + "loss": 0.2795346677303314, + "step": 3812 + }, + { + "epoch": 1.884220931669344, + "grad_norm": 1.2499989201376016, + "learning_rate": 1.853339551435318e-07, + "loss": 0.2313271164894104, + "step": 3813 + }, + { + "epoch": 1.884715185963178, + "grad_norm": 1.4803115521216077, + "learning_rate": 1.8377147423120467e-07, + "loss": 0.22814632952213287, + "step": 3814 + }, + { + "epoch": 1.8852094402570123, + "grad_norm": 1.3259243101199787, + "learning_rate": 1.822155464927866e-07, + "loss": 0.2605836093425751, + "step": 3815 + }, + { + "epoch": 1.8857036945508465, + "grad_norm": 1.3976508324913761, + "learning_rate": 1.8066617296699007e-07, + "loss": 0.23902952671051025, + "step": 3816 + }, + { + "epoch": 1.8861979488446807, + "grad_norm": 1.290435692515394, + "learning_rate": 1.7912335468815545e-07, + "loss": 0.24895761907100677, + "step": 3817 + }, + { + "epoch": 1.8866922031385147, + "grad_norm": 1.4446135232841222, + "learning_rate": 1.7758709268624664e-07, + "loss": 0.24108648300170898, + "step": 3818 + }, + { + "epoch": 1.887186457432349, + "grad_norm": 1.4071508146495701, + "learning_rate": 1.7605738798684767e-07, + "loss": 0.2600073516368866, + "step": 3819 + }, + { + "epoch": 1.8876807117261831, + "grad_norm": 1.3261487318829528, + "learning_rate": 1.745342416111706e-07, + "loss": 0.21564190089702606, + "step": 3820 + }, + { + "epoch": 1.8881749660200173, + "grad_norm": 1.4577577895280622, + "learning_rate": 1.7301765457604647e-07, + "loss": 0.24080556631088257, + "step": 3821 + }, + { + "epoch": 1.8886692203138513, + "grad_norm": 1.316642170468449, + "learning_rate": 1.7150762789392316e-07, + "loss": 0.22631056606769562, + "step": 3822 + }, + { + "epoch": 1.8891634746076855, + "grad_norm": 1.4341533325292704, + "learning_rate": 1.7000416257287654e-07, + "loss": 0.26355087757110596, + "step": 3823 + }, + { + "epoch": 1.8896577289015197, + "grad_norm": 1.387410149780388, + "learning_rate": 1.685072596165982e-07, + "loss": 0.248369500041008, + "step": 3824 + }, + { + "epoch": 1.890151983195354, + "grad_norm": 1.4331472853704903, + "learning_rate": 1.670169200243976e-07, + "loss": 0.2789249122142792, + "step": 3825 + }, + { + "epoch": 1.8906462374891881, + "grad_norm": 1.2052406993380367, + "learning_rate": 1.6553314479120453e-07, + "loss": 0.22493675351142883, + "step": 3826 + }, + { + "epoch": 1.8911404917830223, + "grad_norm": 1.2074956449276386, + "learning_rate": 1.6405593490756766e-07, + "loss": 0.21274074912071228, + "step": 3827 + }, + { + "epoch": 1.8916347460768566, + "grad_norm": 1.3986179942656674, + "learning_rate": 1.6258529135964928e-07, + "loss": 0.2591193914413452, + "step": 3828 + }, + { + "epoch": 1.8921290003706908, + "grad_norm": 1.5077061888652343, + "learning_rate": 1.6112121512923075e-07, + "loss": 0.2791387140750885, + "step": 3829 + }, + { + "epoch": 1.892623254664525, + "grad_norm": 1.449596307066075, + "learning_rate": 1.5966370719371015e-07, + "loss": 0.2840545177459717, + "step": 3830 + }, + { + "epoch": 1.8931175089583592, + "grad_norm": 1.538114321399184, + "learning_rate": 1.582127685260948e-07, + "loss": 0.2563555836677551, + "step": 3831 + }, + { + "epoch": 1.8936117632521934, + "grad_norm": 1.2897284655116197, + "learning_rate": 1.5676840009501538e-07, + "loss": 0.22912704944610596, + "step": 3832 + }, + { + "epoch": 1.8941060175460276, + "grad_norm": 1.3733822665309192, + "learning_rate": 1.5533060286470837e-07, + "loss": 0.25490787625312805, + "step": 3833 + }, + { + "epoch": 1.8946002718398616, + "grad_norm": 1.2282031018618578, + "learning_rate": 1.5389937779502818e-07, + "loss": 0.21826709806919098, + "step": 3834 + }, + { + "epoch": 1.8950945261336958, + "grad_norm": 1.303626845787231, + "learning_rate": 1.524747258414394e-07, + "loss": 0.2292749583721161, + "step": 3835 + }, + { + "epoch": 1.89558878042753, + "grad_norm": 1.3359905611934206, + "learning_rate": 1.5105664795501908e-07, + "loss": 0.24652332067489624, + "step": 3836 + }, + { + "epoch": 1.8960830347213642, + "grad_norm": 1.3777956922677133, + "learning_rate": 1.4964514508245652e-07, + "loss": 0.25154706835746765, + "step": 3837 + }, + { + "epoch": 1.8965772890151982, + "grad_norm": 1.3722697572324272, + "learning_rate": 1.482402181660525e-07, + "loss": 0.2414158582687378, + "step": 3838 + }, + { + "epoch": 1.8970715433090324, + "grad_norm": 1.3960215733148371, + "learning_rate": 1.4684186814371225e-07, + "loss": 0.22421908378601074, + "step": 3839 + }, + { + "epoch": 1.8975657976028666, + "grad_norm": 1.3337706977662172, + "learning_rate": 1.4545009594895687e-07, + "loss": 0.2506029009819031, + "step": 3840 + }, + { + "epoch": 1.8980600518967008, + "grad_norm": 1.239516400526973, + "learning_rate": 1.440649025109142e-07, + "loss": 0.2011726200580597, + "step": 3841 + }, + { + "epoch": 1.898554306190535, + "grad_norm": 1.5242598019660087, + "learning_rate": 1.4268628875431677e-07, + "loss": 0.27702796459198, + "step": 3842 + }, + { + "epoch": 1.8990485604843692, + "grad_norm": 1.236260659855922, + "learning_rate": 1.413142555995095e-07, + "loss": 0.23884715139865875, + "step": 3843 + }, + { + "epoch": 1.8995428147782034, + "grad_norm": 1.2385068593263413, + "learning_rate": 1.3994880396244304e-07, + "loss": 0.2191702425479889, + "step": 3844 + }, + { + "epoch": 1.9000370690720376, + "grad_norm": 1.3532676134331167, + "learning_rate": 1.385899347546704e-07, + "loss": 0.25425833463668823, + "step": 3845 + }, + { + "epoch": 1.9005313233658718, + "grad_norm": 1.3452712776781028, + "learning_rate": 1.37237648883356e-07, + "loss": 0.23355990648269653, + "step": 3846 + }, + { + "epoch": 1.901025577659706, + "grad_norm": 1.200878562022238, + "learning_rate": 1.3589194725126542e-07, + "loss": 0.2079685628414154, + "step": 3847 + }, + { + "epoch": 1.9015198319535402, + "grad_norm": 1.380798956497921, + "learning_rate": 1.3455283075676895e-07, + "loss": 0.25126928091049194, + "step": 3848 + }, + { + "epoch": 1.9020140862473742, + "grad_norm": 1.3306751541769635, + "learning_rate": 1.332203002938437e-07, + "loss": 0.2608864903450012, + "step": 3849 + }, + { + "epoch": 1.9025083405412084, + "grad_norm": 1.3536846944777874, + "learning_rate": 1.3189435675206697e-07, + "loss": 0.27048414945602417, + "step": 3850 + }, + { + "epoch": 1.9030025948350426, + "grad_norm": 1.3873264194773522, + "learning_rate": 1.3057500101661846e-07, + "loss": 0.24350577592849731, + "step": 3851 + }, + { + "epoch": 1.9034968491288768, + "grad_norm": 1.5060374095399143, + "learning_rate": 1.2926223396828363e-07, + "loss": 0.23283880949020386, + "step": 3852 + }, + { + "epoch": 1.9039911034227108, + "grad_norm": 1.3722502195381412, + "learning_rate": 1.2795605648344477e-07, + "loss": 0.23332493007183075, + "step": 3853 + }, + { + "epoch": 1.904485357716545, + "grad_norm": 1.2805992535782373, + "learning_rate": 1.2665646943408882e-07, + "loss": 0.19833901524543762, + "step": 3854 + }, + { + "epoch": 1.9049796120103792, + "grad_norm": 1.316108497317141, + "learning_rate": 1.2536347368780066e-07, + "loss": 0.23650333285331726, + "step": 3855 + }, + { + "epoch": 1.9054738663042134, + "grad_norm": 1.1749486485284195, + "learning_rate": 1.240770701077665e-07, + "loss": 0.20151859521865845, + "step": 3856 + }, + { + "epoch": 1.9059681205980477, + "grad_norm": 1.4620220273758984, + "learning_rate": 1.2279725955277044e-07, + "loss": 0.32347559928894043, + "step": 3857 + }, + { + "epoch": 1.9064623748918819, + "grad_norm": 1.2726582104041342, + "learning_rate": 1.215240428771969e-07, + "loss": 0.25937923789024353, + "step": 3858 + }, + { + "epoch": 1.906956629185716, + "grad_norm": 1.6959402751075685, + "learning_rate": 1.2025742093102477e-07, + "loss": 0.2648822069168091, + "step": 3859 + }, + { + "epoch": 1.9074508834795503, + "grad_norm": 1.4639245582336404, + "learning_rate": 1.1899739455983327e-07, + "loss": 0.27612054347991943, + "step": 3860 + }, + { + "epoch": 1.9079451377733845, + "grad_norm": 1.32342317481008, + "learning_rate": 1.1774396460480064e-07, + "loss": 0.2204264998435974, + "step": 3861 + }, + { + "epoch": 1.9084393920672187, + "grad_norm": 1.4448526349141402, + "learning_rate": 1.164971319026964e-07, + "loss": 0.2719968557357788, + "step": 3862 + }, + { + "epoch": 1.908933646361053, + "grad_norm": 1.3288093626980793, + "learning_rate": 1.1525689728588807e-07, + "loss": 0.2308243364095688, + "step": 3863 + }, + { + "epoch": 1.909427900654887, + "grad_norm": 1.405242953564276, + "learning_rate": 1.1402326158234e-07, + "loss": 0.23281638324260712, + "step": 3864 + }, + { + "epoch": 1.909922154948721, + "grad_norm": 1.553800687505842, + "learning_rate": 1.127962256156101e-07, + "loss": 0.26273444294929504, + "step": 3865 + }, + { + "epoch": 1.9104164092425553, + "grad_norm": 1.3311046226223713, + "learning_rate": 1.1157579020484755e-07, + "loss": 0.26783496141433716, + "step": 3866 + }, + { + "epoch": 1.9109106635363895, + "grad_norm": 1.4482920311066827, + "learning_rate": 1.1036195616480061e-07, + "loss": 0.2575075626373291, + "step": 3867 + }, + { + "epoch": 1.9114049178302237, + "grad_norm": 1.3313207733281058, + "learning_rate": 1.0915472430580443e-07, + "loss": 0.24802085757255554, + "step": 3868 + }, + { + "epoch": 1.9118991721240577, + "grad_norm": 1.230518560175702, + "learning_rate": 1.0795409543379099e-07, + "loss": 0.22017821669578552, + "step": 3869 + }, + { + "epoch": 1.912393426417892, + "grad_norm": 1.3804831257002024, + "learning_rate": 1.0676007035028579e-07, + "loss": 0.2525743246078491, + "step": 3870 + }, + { + "epoch": 1.912887680711726, + "grad_norm": 1.5674388988470875, + "learning_rate": 1.05572649852399e-07, + "loss": 0.26704782247543335, + "step": 3871 + }, + { + "epoch": 1.9133819350055603, + "grad_norm": 3.430480948746706, + "learning_rate": 1.0439183473283654e-07, + "loss": 0.25393134355545044, + "step": 3872 + }, + { + "epoch": 1.9138761892993945, + "grad_norm": 1.4465108879454651, + "learning_rate": 1.0321762577989448e-07, + "loss": 0.27266988158226013, + "step": 3873 + }, + { + "epoch": 1.9143704435932287, + "grad_norm": 1.366912603525092, + "learning_rate": 1.0205002377745799e-07, + "loss": 0.2694425582885742, + "step": 3874 + }, + { + "epoch": 1.914864697887063, + "grad_norm": 1.394500016346508, + "learning_rate": 1.0088902950500023e-07, + "loss": 0.28820598125457764, + "step": 3875 + }, + { + "epoch": 1.9153589521808971, + "grad_norm": 1.3050023577266547, + "learning_rate": 9.973464373758679e-08, + "loss": 0.2194051444530487, + "step": 3876 + }, + { + "epoch": 1.9158532064747313, + "grad_norm": 1.3831603392475145, + "learning_rate": 9.858686724586675e-08, + "loss": 0.25639402866363525, + "step": 3877 + }, + { + "epoch": 1.9163474607685655, + "grad_norm": 1.2744346736321277, + "learning_rate": 9.744570079608051e-08, + "loss": 0.23420584201812744, + "step": 3878 + }, + { + "epoch": 1.9168417150623998, + "grad_norm": 1.38639151316596, + "learning_rate": 9.631114515005425e-08, + "loss": 0.2514578700065613, + "step": 3879 + }, + { + "epoch": 1.9173359693562337, + "grad_norm": 1.296540814966686, + "learning_rate": 9.518320106520096e-08, + "loss": 0.2223532646894455, + "step": 3880 + }, + { + "epoch": 1.917830223650068, + "grad_norm": 1.367450022954602, + "learning_rate": 9.406186929451943e-08, + "loss": 0.21725934743881226, + "step": 3881 + }, + { + "epoch": 1.9183244779439022, + "grad_norm": 1.2939049219304557, + "learning_rate": 9.294715058659531e-08, + "loss": 0.2081519365310669, + "step": 3882 + }, + { + "epoch": 1.9188187322377364, + "grad_norm": 1.4148048553245687, + "learning_rate": 9.183904568559998e-08, + "loss": 0.23683780431747437, + "step": 3883 + }, + { + "epoch": 1.9193129865315703, + "grad_norm": 1.3217345576155297, + "learning_rate": 9.073755533128725e-08, + "loss": 0.26095467805862427, + "step": 3884 + }, + { + "epoch": 1.9198072408254045, + "grad_norm": 1.253461281568054, + "learning_rate": 8.964268025899558e-08, + "loss": 0.24427568912506104, + "step": 3885 + }, + { + "epoch": 1.9203014951192388, + "grad_norm": 1.3603609343742546, + "learning_rate": 8.855442119964919e-08, + "loss": 0.23549365997314453, + "step": 3886 + }, + { + "epoch": 1.920795749413073, + "grad_norm": 1.4769071310965274, + "learning_rate": 8.74727788797547e-08, + "loss": 0.2645740807056427, + "step": 3887 + }, + { + "epoch": 1.9212900037069072, + "grad_norm": 1.3315198325383535, + "learning_rate": 8.639775402139894e-08, + "loss": 0.22890612483024597, + "step": 3888 + }, + { + "epoch": 1.9217842580007414, + "grad_norm": 1.4439303401955232, + "learning_rate": 8.532934734225451e-08, + "loss": 0.23417067527770996, + "step": 3889 + }, + { + "epoch": 1.9222785122945756, + "grad_norm": 1.3482339584478593, + "learning_rate": 8.42675595555753e-08, + "loss": 0.26125872135162354, + "step": 3890 + }, + { + "epoch": 1.9227727665884098, + "grad_norm": 1.4420298418522868, + "learning_rate": 8.321239137019433e-08, + "loss": 0.26559343934059143, + "step": 3891 + }, + { + "epoch": 1.923267020882244, + "grad_norm": 1.188066329993037, + "learning_rate": 8.216384349052809e-08, + "loss": 0.2033136785030365, + "step": 3892 + }, + { + "epoch": 1.9237612751760782, + "grad_norm": 1.975689815636208, + "learning_rate": 8.112191661656999e-08, + "loss": 0.2750868797302246, + "step": 3893 + }, + { + "epoch": 1.9242555294699124, + "grad_norm": 1.366292176712638, + "learning_rate": 8.008661144389807e-08, + "loss": 0.2082993984222412, + "step": 3894 + }, + { + "epoch": 1.9247497837637466, + "grad_norm": 1.4608755297303442, + "learning_rate": 7.905792866366501e-08, + "loss": 0.2495439350605011, + "step": 3895 + }, + { + "epoch": 1.9252440380575806, + "grad_norm": 1.4141233844295813, + "learning_rate": 7.803586896260707e-08, + "loss": 0.25609591603279114, + "step": 3896 + }, + { + "epoch": 1.9257382923514148, + "grad_norm": 1.5334004898395663, + "learning_rate": 7.702043302303397e-08, + "loss": 0.25372135639190674, + "step": 3897 + }, + { + "epoch": 1.926232546645249, + "grad_norm": 1.3368221554281705, + "learning_rate": 7.601162152283904e-08, + "loss": 0.21882784366607666, + "step": 3898 + }, + { + "epoch": 1.926726800939083, + "grad_norm": 1.5284992426615736, + "learning_rate": 7.500943513548797e-08, + "loss": 0.24513296782970428, + "step": 3899 + }, + { + "epoch": 1.9272210552329172, + "grad_norm": 1.3036631509681367, + "learning_rate": 7.401387453002673e-08, + "loss": 0.23508042097091675, + "step": 3900 + } + ], + "logging_steps": 1, + "max_steps": 4048, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2954647833083904.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3900/training_args.bin b/checkpoint-3900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dfe3e09693106b888d9a74120f900fc466890d4c --- /dev/null +++ b/checkpoint-3900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0dfb10ba35de856be3ab9b2b044348b5752efc43fb83f0d6e71a782894a3001 +size 6968 diff --git a/checkpoint-3900/zero_to_fp32.py b/checkpoint-3900/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-3900/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-4000/README.md b/checkpoint-4000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-4000/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-4000/adapter_config.json b/checkpoint-4000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6cf16f3266f5592ee03447db73cafc0bd600786e --- /dev/null +++ b/checkpoint-4000/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.25.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.13.mlp.down_proj", + "layers.20.mlp.gate_proj", + "layers.10.mlp.down_proj", + "layers.27.mlp.down_proj", + "layers.10.mlp.up_proj", + "layers.7.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.15.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.3.mlp.gate_proj", + "layers.14.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.22.mlp.up_proj", + "layers.27.mlp.up_proj", + "layers.12.mlp.up_proj", + "layers.4.mlp.up_proj", + "layers.19.mlp.up_proj", + "layers.11.mlp.up_proj", + "layers.10.mlp.gate_proj", + "layers.15.mlp.up_proj", + "layers.20.mlp.down_proj", + "layers.4.mlp.down_proj", + "layers.3.mlp.up_proj", + "layers.5.mlp.gate_proj", + "layers.4.mlp.gate_proj", + "layers.5.mlp.down_proj", + "layers.6.mlp.up_proj", + "o_proj", + "layers.24.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.25.mlp.down_proj", + "layers.16.mlp.down_proj", + "layers.12.mlp.gate_proj", + "layers.19.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.21.mlp.gate_proj", + "layers.0.mlp.down_proj", + "layers.18.mlp.up_proj", + "layers.15.mlp.gate_proj", + "layers.8.mlp.gate_proj", + "layers.13.mlp.gate_proj", + "layers.11.mlp.down_proj", + "layers.11.mlp.gate_proj", + "layers.7.mlp.gate_proj", + "layers.17.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.24.mlp.gate_proj", + "layers.16.mlp.gate_proj", + "layers.20.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.22.mlp.down_proj", + "layers.6.mlp.gate_proj", + "layers.0.mlp.gate_proj", + "layers.5.mlp.up_proj", + "layers.18.mlp.gate_proj", + "k_proj", + "layers.9.mlp.gate_proj", + "layers.23.mlp.down_proj", + "layers.26.mlp.down_proj", + "layers.19.mlp.down_proj", + "layers.22.mlp.gate_proj", + "layers.13.mlp.up_proj", + "v_proj", + "layers.6.mlp.down_proj", + "q_proj", + "layers.27.mlp.gate_proj", + "layers.9.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.14.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.17.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.0.mlp.up_proj", + "layers.26.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.2.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.16.mlp.up_proj", + "layers.12.mlp.down_proj", + "layers.2.mlp.up_proj", + "layers.1.mlp.up_proj", + "layers.8.mlp.down_proj", + "layers.3.mlp.down_proj", + "layers.24.mlp.up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-4000/adapter_model.safetensors b/checkpoint-4000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bfd66828d42bbe206f04abfc548a0c2371e431a --- /dev/null +++ b/checkpoint-4000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef4fbd2bfefb9ab29431b7517116459475e58aabd5b58190f97023dc1541cb57 +size 323020440 diff --git a/checkpoint-4000/chat_template.jinja b/checkpoint-4000/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-4000/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-4000/global_step4000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-4000/global_step4000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e1e18ba199a3fa2ecea28d3b3305370b4ae851e5 --- /dev/null +++ b/checkpoint-4000/global_step4000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a34d2bd807fb2d307735e97950128d60460fc045616f6d34f62aff782a2c23d8 +size 1937772272 diff --git a/checkpoint-4000/global_step4000/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-4000/global_step4000/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f70312a27db738dff18ad93a545f21435671a43 --- /dev/null +++ b/checkpoint-4000/global_step4000/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a5115227bece3fb1941e5628ae397e470139027a62c54056e640316573f7632 +size 460630 diff --git a/checkpoint-4000/latest b/checkpoint-4000/latest new file mode 100644 index 0000000000000000000000000000000000000000..641c40ce8d5e05e04a0840d69bcc5fac045a3fbd --- /dev/null +++ b/checkpoint-4000/latest @@ -0,0 +1 @@ +global_step4000 \ No newline at end of file diff --git a/checkpoint-4000/processor_config.json b/checkpoint-4000/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-4000/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-4000/rng_state.pth b/checkpoint-4000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8554c2f926c896219822e49b7b7cf2f8c8accfc4 --- /dev/null +++ b/checkpoint-4000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4358983805f459003c5efe99494000fbff896cb65cf473004262c304fdebe8 +size 14244 diff --git a/checkpoint-4000/scheduler.pt b/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd1e070dec4dfe4c473152efd5854fe454757c25 --- /dev/null +++ b/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fa469e18f6d42a74d14a921d9941c33ec6ee82226c7ee35f98c0370ef87fa1e +size 1000 diff --git a/checkpoint-4000/tokenizer.json b/checkpoint-4000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-4000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-4000/tokenizer_config.json b/checkpoint-4000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-4000/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-4000/trainer_state.json b/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9024a9c6534ba4c19c9466ef1ed04d497b4337d2 --- /dev/null +++ b/checkpoint-4000/trainer_state.json @@ -0,0 +1,28034 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9766464846163352, + "eval_steps": 500, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0004942542938341777, + "grad_norm": 1.7827389996067007, + "learning_rate": 0.0, + "loss": 1.1816105842590332, + "step": 1 + }, + { + "epoch": 0.0009885085876683553, + "grad_norm": 1.891128580111598, + "learning_rate": 9.852216748768474e-08, + "loss": 1.1496102809906006, + "step": 2 + }, + { + "epoch": 0.001482762881502533, + "grad_norm": 1.8581340535316004, + "learning_rate": 1.9704433497536947e-07, + "loss": 1.1515967845916748, + "step": 3 + }, + { + "epoch": 0.0019770171753367106, + "grad_norm": 1.708604556953044, + "learning_rate": 2.955665024630542e-07, + "loss": 1.1795943975448608, + "step": 4 + }, + { + "epoch": 0.0024712714691708885, + "grad_norm": 1.8513528590958555, + "learning_rate": 3.9408866995073894e-07, + "loss": 1.2289564609527588, + "step": 5 + }, + { + "epoch": 0.002965525763005066, + "grad_norm": 1.972324289049384, + "learning_rate": 4.926108374384237e-07, + "loss": 1.179269790649414, + "step": 6 + }, + { + "epoch": 0.003459780056839244, + "grad_norm": 1.8334156798400192, + "learning_rate": 5.911330049261084e-07, + "loss": 1.199608564376831, + "step": 7 + }, + { + "epoch": 0.003954034350673421, + "grad_norm": 1.6669436389627912, + "learning_rate": 6.896551724137931e-07, + "loss": 1.1643707752227783, + "step": 8 + }, + { + "epoch": 0.004448288644507599, + "grad_norm": 1.8750060934609654, + "learning_rate": 7.881773399014779e-07, + "loss": 1.1264240741729736, + "step": 9 + }, + { + "epoch": 0.004942542938341777, + "grad_norm": 1.9962482953672744, + "learning_rate": 8.866995073891626e-07, + "loss": 1.1717555522918701, + "step": 10 + }, + { + "epoch": 0.005436797232175955, + "grad_norm": 1.895693583554434, + "learning_rate": 9.852216748768474e-07, + "loss": 1.1856712102890015, + "step": 11 + }, + { + "epoch": 0.005931051526010132, + "grad_norm": 1.7765248738469863, + "learning_rate": 1.0837438423645322e-06, + "loss": 1.1258785724639893, + "step": 12 + }, + { + "epoch": 0.00642530581984431, + "grad_norm": 1.8326605479421993, + "learning_rate": 1.1822660098522167e-06, + "loss": 1.1333656311035156, + "step": 13 + }, + { + "epoch": 0.006919560113678488, + "grad_norm": 1.9142537067819894, + "learning_rate": 1.2807881773399017e-06, + "loss": 1.2281363010406494, + "step": 14 + }, + { + "epoch": 0.0074138144075126654, + "grad_norm": 1.9232318367357442, + "learning_rate": 1.3793103448275862e-06, + "loss": 1.1910676956176758, + "step": 15 + }, + { + "epoch": 0.007908068701346842, + "grad_norm": 2.5599273269087885, + "learning_rate": 1.4778325123152712e-06, + "loss": 1.2124552726745605, + "step": 16 + }, + { + "epoch": 0.008402322995181021, + "grad_norm": 2.2109761155287133, + "learning_rate": 1.5763546798029558e-06, + "loss": 1.1993463039398193, + "step": 17 + }, + { + "epoch": 0.008896577289015198, + "grad_norm": 2.1999117305307077, + "learning_rate": 1.6748768472906405e-06, + "loss": 1.1245683431625366, + "step": 18 + }, + { + "epoch": 0.009390831582849375, + "grad_norm": 2.203478389299074, + "learning_rate": 1.7733990147783253e-06, + "loss": 1.1838568449020386, + "step": 19 + }, + { + "epoch": 0.009885085876683554, + "grad_norm": 2.419107047950166, + "learning_rate": 1.8719211822660098e-06, + "loss": 1.081169843673706, + "step": 20 + }, + { + "epoch": 0.010379340170517731, + "grad_norm": 2.559921706815215, + "learning_rate": 1.970443349753695e-06, + "loss": 1.1506569385528564, + "step": 21 + }, + { + "epoch": 0.01087359446435191, + "grad_norm": 2.8697838151244977, + "learning_rate": 2.0689655172413796e-06, + "loss": 1.0841327905654907, + "step": 22 + }, + { + "epoch": 0.011367848758186087, + "grad_norm": 2.8012936510978905, + "learning_rate": 2.1674876847290643e-06, + "loss": 1.1335525512695312, + "step": 23 + }, + { + "epoch": 0.011862103052020264, + "grad_norm": 2.649521736906966, + "learning_rate": 2.266009852216749e-06, + "loss": 1.035188913345337, + "step": 24 + }, + { + "epoch": 0.012356357345854442, + "grad_norm": 2.7385314170591166, + "learning_rate": 2.3645320197044334e-06, + "loss": 1.0640877485275269, + "step": 25 + }, + { + "epoch": 0.01285061163968862, + "grad_norm": 2.5011806151261755, + "learning_rate": 2.4630541871921186e-06, + "loss": 1.0479273796081543, + "step": 26 + }, + { + "epoch": 0.013344865933522798, + "grad_norm": 2.236670838822209, + "learning_rate": 2.5615763546798034e-06, + "loss": 1.0522505044937134, + "step": 27 + }, + { + "epoch": 0.013839120227356975, + "grad_norm": 2.065544668093392, + "learning_rate": 2.660098522167488e-06, + "loss": 1.080836296081543, + "step": 28 + }, + { + "epoch": 0.014333374521191152, + "grad_norm": 1.7478242928012908, + "learning_rate": 2.7586206896551725e-06, + "loss": 0.9712544679641724, + "step": 29 + }, + { + "epoch": 0.014827628815025331, + "grad_norm": 1.5930614486695707, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.0469061136245728, + "step": 30 + }, + { + "epoch": 0.015321883108859508, + "grad_norm": 1.380137621152324, + "learning_rate": 2.9556650246305424e-06, + "loss": 0.9911116361618042, + "step": 31 + }, + { + "epoch": 0.015816137402693685, + "grad_norm": 1.3167918112915387, + "learning_rate": 3.054187192118227e-06, + "loss": 0.9552959203720093, + "step": 32 + }, + { + "epoch": 0.016310391696527864, + "grad_norm": 1.2266567383194062, + "learning_rate": 3.1527093596059115e-06, + "loss": 0.957429051399231, + "step": 33 + }, + { + "epoch": 0.016804645990362042, + "grad_norm": 1.305011449405004, + "learning_rate": 3.2512315270935963e-06, + "loss": 1.0180628299713135, + "step": 34 + }, + { + "epoch": 0.017298900284196218, + "grad_norm": 1.2347397961596738, + "learning_rate": 3.349753694581281e-06, + "loss": 0.9064415097236633, + "step": 35 + }, + { + "epoch": 0.017793154578030396, + "grad_norm": 1.216758814553776, + "learning_rate": 3.448275862068966e-06, + "loss": 0.9718184471130371, + "step": 36 + }, + { + "epoch": 0.018287408871864575, + "grad_norm": 1.065779121444896, + "learning_rate": 3.5467980295566506e-06, + "loss": 0.8831444978713989, + "step": 37 + }, + { + "epoch": 0.01878166316569875, + "grad_norm": 1.0132491929086573, + "learning_rate": 3.6453201970443354e-06, + "loss": 0.9167139530181885, + "step": 38 + }, + { + "epoch": 0.01927591745953293, + "grad_norm": 1.0431186403983612, + "learning_rate": 3.7438423645320197e-06, + "loss": 0.9322037696838379, + "step": 39 + }, + { + "epoch": 0.019770171753367108, + "grad_norm": 1.0319066435292568, + "learning_rate": 3.842364532019705e-06, + "loss": 0.9189817905426025, + "step": 40 + }, + { + "epoch": 0.020264426047201287, + "grad_norm": 1.1670657884595383, + "learning_rate": 3.94088669950739e-06, + "loss": 0.8480448126792908, + "step": 41 + }, + { + "epoch": 0.020758680341035462, + "grad_norm": 0.9850175889441174, + "learning_rate": 4.039408866995074e-06, + "loss": 0.8907301425933838, + "step": 42 + }, + { + "epoch": 0.02125293463486964, + "grad_norm": 1.0028387912933743, + "learning_rate": 4.137931034482759e-06, + "loss": 0.8674390316009521, + "step": 43 + }, + { + "epoch": 0.02174718892870382, + "grad_norm": 0.9822966394815191, + "learning_rate": 4.236453201970444e-06, + "loss": 0.8674882054328918, + "step": 44 + }, + { + "epoch": 0.022241443222537995, + "grad_norm": 0.9778327665239519, + "learning_rate": 4.334975369458129e-06, + "loss": 0.8542560338973999, + "step": 45 + }, + { + "epoch": 0.022735697516372173, + "grad_norm": 0.8621828386281931, + "learning_rate": 4.4334975369458135e-06, + "loss": 0.772778332233429, + "step": 46 + }, + { + "epoch": 0.023229951810206352, + "grad_norm": 0.8638093364937629, + "learning_rate": 4.532019704433498e-06, + "loss": 0.7481152415275574, + "step": 47 + }, + { + "epoch": 0.023724206104040527, + "grad_norm": 0.8467972866728939, + "learning_rate": 4.630541871921182e-06, + "loss": 0.8373709917068481, + "step": 48 + }, + { + "epoch": 0.024218460397874706, + "grad_norm": 0.8165134857986008, + "learning_rate": 4.729064039408867e-06, + "loss": 0.8163385391235352, + "step": 49 + }, + { + "epoch": 0.024712714691708885, + "grad_norm": 0.833026336683437, + "learning_rate": 4.8275862068965525e-06, + "loss": 0.7444975972175598, + "step": 50 + }, + { + "epoch": 0.025206968985543064, + "grad_norm": 0.858591041664589, + "learning_rate": 4.926108374384237e-06, + "loss": 0.7683243751525879, + "step": 51 + }, + { + "epoch": 0.02570122327937724, + "grad_norm": 1.0127725906591662, + "learning_rate": 5.024630541871922e-06, + "loss": 0.806761622428894, + "step": 52 + }, + { + "epoch": 0.026195477573211418, + "grad_norm": 0.8333649125881921, + "learning_rate": 5.123152709359607e-06, + "loss": 0.7312102913856506, + "step": 53 + }, + { + "epoch": 0.026689731867045596, + "grad_norm": 0.9425883709792775, + "learning_rate": 5.2216748768472915e-06, + "loss": 0.7351999282836914, + "step": 54 + }, + { + "epoch": 0.02718398616087977, + "grad_norm": 0.9039627787948463, + "learning_rate": 5.320197044334976e-06, + "loss": 0.7453763484954834, + "step": 55 + }, + { + "epoch": 0.02767824045471395, + "grad_norm": 0.9324665454088699, + "learning_rate": 5.41871921182266e-06, + "loss": 0.7063292860984802, + "step": 56 + }, + { + "epoch": 0.02817249474854813, + "grad_norm": 0.8343256198457882, + "learning_rate": 5.517241379310345e-06, + "loss": 0.7145994901657104, + "step": 57 + }, + { + "epoch": 0.028666749042382304, + "grad_norm": 0.7157092163314197, + "learning_rate": 5.61576354679803e-06, + "loss": 0.687594473361969, + "step": 58 + }, + { + "epoch": 0.029161003336216483, + "grad_norm": 0.7603582128739335, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.6643895506858826, + "step": 59 + }, + { + "epoch": 0.029655257630050662, + "grad_norm": 0.6925073694472516, + "learning_rate": 5.812807881773399e-06, + "loss": 0.6781614422798157, + "step": 60 + }, + { + "epoch": 0.030149511923884837, + "grad_norm": 0.7169709854131228, + "learning_rate": 5.911330049261085e-06, + "loss": 0.6209158301353455, + "step": 61 + }, + { + "epoch": 0.030643766217719016, + "grad_norm": 0.6749920715098945, + "learning_rate": 6.00985221674877e-06, + "loss": 0.6424679756164551, + "step": 62 + }, + { + "epoch": 0.031138020511553195, + "grad_norm": 0.6435584468821339, + "learning_rate": 6.108374384236454e-06, + "loss": 0.6745971441268921, + "step": 63 + }, + { + "epoch": 0.03163227480538737, + "grad_norm": 0.657544191989632, + "learning_rate": 6.206896551724138e-06, + "loss": 0.6520330905914307, + "step": 64 + }, + { + "epoch": 0.03212652909922155, + "grad_norm": 0.6351335823908374, + "learning_rate": 6.305418719211823e-06, + "loss": 0.6790571212768555, + "step": 65 + }, + { + "epoch": 0.03262078339305573, + "grad_norm": 0.6484215339353426, + "learning_rate": 6.403940886699508e-06, + "loss": 0.6491506099700928, + "step": 66 + }, + { + "epoch": 0.033115037686889906, + "grad_norm": 0.617685895397393, + "learning_rate": 6.502463054187193e-06, + "loss": 0.6347313523292542, + "step": 67 + }, + { + "epoch": 0.033609291980724085, + "grad_norm": 0.6638567270691007, + "learning_rate": 6.600985221674877e-06, + "loss": 0.6785881519317627, + "step": 68 + }, + { + "epoch": 0.034103546274558263, + "grad_norm": 0.6459369268846485, + "learning_rate": 6.699507389162562e-06, + "loss": 0.6470085978507996, + "step": 69 + }, + { + "epoch": 0.034597800568392435, + "grad_norm": 0.6364523697931875, + "learning_rate": 6.798029556650246e-06, + "loss": 0.6205961108207703, + "step": 70 + }, + { + "epoch": 0.035092054862226614, + "grad_norm": 0.6434045969551643, + "learning_rate": 6.896551724137932e-06, + "loss": 0.6621580123901367, + "step": 71 + }, + { + "epoch": 0.03558630915606079, + "grad_norm": 0.6281362500041567, + "learning_rate": 6.995073891625616e-06, + "loss": 0.6363088488578796, + "step": 72 + }, + { + "epoch": 0.03608056344989497, + "grad_norm": 0.6023389614758552, + "learning_rate": 7.093596059113301e-06, + "loss": 0.6073004007339478, + "step": 73 + }, + { + "epoch": 0.03657481774372915, + "grad_norm": 0.5962790573618366, + "learning_rate": 7.192118226600986e-06, + "loss": 0.6490880846977234, + "step": 74 + }, + { + "epoch": 0.03706907203756333, + "grad_norm": 0.6425224117743127, + "learning_rate": 7.290640394088671e-06, + "loss": 0.6540624499320984, + "step": 75 + }, + { + "epoch": 0.0375633263313975, + "grad_norm": 0.6885040620745063, + "learning_rate": 7.3891625615763555e-06, + "loss": 0.6237976551055908, + "step": 76 + }, + { + "epoch": 0.03805758062523168, + "grad_norm": 0.6110947192931153, + "learning_rate": 7.487684729064039e-06, + "loss": 0.6121219992637634, + "step": 77 + }, + { + "epoch": 0.03855183491906586, + "grad_norm": 0.6031847840211293, + "learning_rate": 7.586206896551724e-06, + "loss": 0.5785888433456421, + "step": 78 + }, + { + "epoch": 0.03904608921290004, + "grad_norm": 0.645073431050071, + "learning_rate": 7.68472906403941e-06, + "loss": 0.6144810914993286, + "step": 79 + }, + { + "epoch": 0.039540343506734216, + "grad_norm": 0.709404375816405, + "learning_rate": 7.783251231527095e-06, + "loss": 0.6522500514984131, + "step": 80 + }, + { + "epoch": 0.040034597800568394, + "grad_norm": 0.6784602446095636, + "learning_rate": 7.88177339901478e-06, + "loss": 0.6126501560211182, + "step": 81 + }, + { + "epoch": 0.04052885209440257, + "grad_norm": 0.6834338295248128, + "learning_rate": 7.980295566502464e-06, + "loss": 0.573388934135437, + "step": 82 + }, + { + "epoch": 0.041023106388236745, + "grad_norm": 0.7128627750045655, + "learning_rate": 8.078817733990149e-06, + "loss": 0.6462322473526001, + "step": 83 + }, + { + "epoch": 0.041517360682070924, + "grad_norm": 0.6985575396830678, + "learning_rate": 8.177339901477834e-06, + "loss": 0.6542905569076538, + "step": 84 + }, + { + "epoch": 0.0420116149759051, + "grad_norm": 0.6800738258763197, + "learning_rate": 8.275862068965518e-06, + "loss": 0.6539976000785828, + "step": 85 + }, + { + "epoch": 0.04250586926973928, + "grad_norm": 0.6805451756514653, + "learning_rate": 8.374384236453203e-06, + "loss": 0.6303049325942993, + "step": 86 + }, + { + "epoch": 0.04300012356357346, + "grad_norm": 0.6262637687675628, + "learning_rate": 8.472906403940888e-06, + "loss": 0.5727078318595886, + "step": 87 + }, + { + "epoch": 0.04349437785740764, + "grad_norm": 0.6392194157453778, + "learning_rate": 8.571428571428571e-06, + "loss": 0.6204914450645447, + "step": 88 + }, + { + "epoch": 0.04398863215124181, + "grad_norm": 0.8144620373591464, + "learning_rate": 8.669950738916257e-06, + "loss": 0.633359432220459, + "step": 89 + }, + { + "epoch": 0.04448288644507599, + "grad_norm": 0.6564252660453104, + "learning_rate": 8.768472906403942e-06, + "loss": 0.5737719535827637, + "step": 90 + }, + { + "epoch": 0.04497714073891017, + "grad_norm": 0.704224097621618, + "learning_rate": 8.866995073891627e-06, + "loss": 0.6438707709312439, + "step": 91 + }, + { + "epoch": 0.04547139503274435, + "grad_norm": 0.7123681566966987, + "learning_rate": 8.965517241379312e-06, + "loss": 0.6284823417663574, + "step": 92 + }, + { + "epoch": 0.045965649326578525, + "grad_norm": 0.6879682376399587, + "learning_rate": 9.064039408866996e-06, + "loss": 0.6442058086395264, + "step": 93 + }, + { + "epoch": 0.046459903620412704, + "grad_norm": 0.709934515039082, + "learning_rate": 9.162561576354681e-06, + "loss": 0.5821751356124878, + "step": 94 + }, + { + "epoch": 0.04695415791424688, + "grad_norm": 1.530236961676562, + "learning_rate": 9.261083743842364e-06, + "loss": 0.546042263507843, + "step": 95 + }, + { + "epoch": 0.047448412208081055, + "grad_norm": 0.6844457378175872, + "learning_rate": 9.359605911330049e-06, + "loss": 0.5743244886398315, + "step": 96 + }, + { + "epoch": 0.04794266650191523, + "grad_norm": 0.6876016450255833, + "learning_rate": 9.458128078817734e-06, + "loss": 0.5775831341743469, + "step": 97 + }, + { + "epoch": 0.04843692079574941, + "grad_norm": 0.6367125491834975, + "learning_rate": 9.55665024630542e-06, + "loss": 0.5632016658782959, + "step": 98 + }, + { + "epoch": 0.04893117508958359, + "grad_norm": 0.635357516984843, + "learning_rate": 9.655172413793105e-06, + "loss": 0.5817564129829407, + "step": 99 + }, + { + "epoch": 0.04942542938341777, + "grad_norm": 0.6380730461382318, + "learning_rate": 9.75369458128079e-06, + "loss": 0.5692225098609924, + "step": 100 + }, + { + "epoch": 0.04991968367725195, + "grad_norm": 0.6016319910280624, + "learning_rate": 9.852216748768475e-06, + "loss": 0.5239434242248535, + "step": 101 + }, + { + "epoch": 0.05041393797108613, + "grad_norm": 0.6757811368400487, + "learning_rate": 9.95073891625616e-06, + "loss": 0.543138861656189, + "step": 102 + }, + { + "epoch": 0.0509081922649203, + "grad_norm": 0.6907500926239555, + "learning_rate": 1.0049261083743844e-05, + "loss": 0.5914052128791809, + "step": 103 + }, + { + "epoch": 0.05140244655875448, + "grad_norm": 0.657964391130701, + "learning_rate": 1.0147783251231529e-05, + "loss": 0.5394442081451416, + "step": 104 + }, + { + "epoch": 0.051896700852588656, + "grad_norm": 0.6411875370567456, + "learning_rate": 1.0246305418719214e-05, + "loss": 0.6157902479171753, + "step": 105 + }, + { + "epoch": 0.052390955146422835, + "grad_norm": 0.738818036033501, + "learning_rate": 1.0344827586206898e-05, + "loss": 0.5863415598869324, + "step": 106 + }, + { + "epoch": 0.052885209440257014, + "grad_norm": 0.7066380161278255, + "learning_rate": 1.0443349753694583e-05, + "loss": 0.5783145427703857, + "step": 107 + }, + { + "epoch": 0.05337946373409119, + "grad_norm": 0.6486663261886427, + "learning_rate": 1.0541871921182268e-05, + "loss": 0.5761469006538391, + "step": 108 + }, + { + "epoch": 0.053873718027925364, + "grad_norm": 0.7011826885785277, + "learning_rate": 1.0640394088669953e-05, + "loss": 0.5931205749511719, + "step": 109 + }, + { + "epoch": 0.05436797232175954, + "grad_norm": 0.6624296231637669, + "learning_rate": 1.0738916256157637e-05, + "loss": 0.5429986119270325, + "step": 110 + }, + { + "epoch": 0.05486222661559372, + "grad_norm": 0.758180242025479, + "learning_rate": 1.083743842364532e-05, + "loss": 0.5154455304145813, + "step": 111 + }, + { + "epoch": 0.0553564809094279, + "grad_norm": 0.6631694030017043, + "learning_rate": 1.0935960591133005e-05, + "loss": 0.5465028285980225, + "step": 112 + }, + { + "epoch": 0.05585073520326208, + "grad_norm": 0.7234030186547562, + "learning_rate": 1.103448275862069e-05, + "loss": 0.5973349213600159, + "step": 113 + }, + { + "epoch": 0.05634498949709626, + "grad_norm": 0.8062494007312124, + "learning_rate": 1.1133004926108375e-05, + "loss": 0.6201578378677368, + "step": 114 + }, + { + "epoch": 0.05683924379093044, + "grad_norm": 0.7754913697435033, + "learning_rate": 1.123152709359606e-05, + "loss": 0.5090143084526062, + "step": 115 + }, + { + "epoch": 0.05733349808476461, + "grad_norm": 0.7128751966577052, + "learning_rate": 1.1330049261083744e-05, + "loss": 0.5275869369506836, + "step": 116 + }, + { + "epoch": 0.05782775237859879, + "grad_norm": 0.6950533949454222, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.571302056312561, + "step": 117 + }, + { + "epoch": 0.058322006672432966, + "grad_norm": 0.727683614551879, + "learning_rate": 1.1527093596059114e-05, + "loss": 0.5920293927192688, + "step": 118 + }, + { + "epoch": 0.058816260966267145, + "grad_norm": 0.7151674344713859, + "learning_rate": 1.1625615763546799e-05, + "loss": 0.5877068042755127, + "step": 119 + }, + { + "epoch": 0.059310515260101324, + "grad_norm": 0.7467125629300125, + "learning_rate": 1.1724137931034483e-05, + "loss": 0.6140042543411255, + "step": 120 + }, + { + "epoch": 0.0598047695539355, + "grad_norm": 0.7531213899377466, + "learning_rate": 1.182266009852217e-05, + "loss": 0.5642052292823792, + "step": 121 + }, + { + "epoch": 0.060299023847769674, + "grad_norm": 0.7258097143889621, + "learning_rate": 1.1921182266009855e-05, + "loss": 0.5535261034965515, + "step": 122 + }, + { + "epoch": 0.06079327814160385, + "grad_norm": 0.6906824437380253, + "learning_rate": 1.201970443349754e-05, + "loss": 0.5202849507331848, + "step": 123 + }, + { + "epoch": 0.06128753243543803, + "grad_norm": 0.7290752273219125, + "learning_rate": 1.2118226600985224e-05, + "loss": 0.5626791715621948, + "step": 124 + }, + { + "epoch": 0.06178178672927221, + "grad_norm": 0.6770400510110369, + "learning_rate": 1.2216748768472909e-05, + "loss": 0.5416101217269897, + "step": 125 + }, + { + "epoch": 0.06227604102310639, + "grad_norm": 0.730080694043851, + "learning_rate": 1.2315270935960592e-05, + "loss": 0.5683388710021973, + "step": 126 + }, + { + "epoch": 0.06277029531694056, + "grad_norm": 0.7617011668537459, + "learning_rate": 1.2413793103448277e-05, + "loss": 0.564468264579773, + "step": 127 + }, + { + "epoch": 0.06326454961077474, + "grad_norm": 0.7085057216007719, + "learning_rate": 1.2512315270935961e-05, + "loss": 0.5419844388961792, + "step": 128 + }, + { + "epoch": 0.06375880390460892, + "grad_norm": 0.7653624040034734, + "learning_rate": 1.2610837438423646e-05, + "loss": 0.51283860206604, + "step": 129 + }, + { + "epoch": 0.0642530581984431, + "grad_norm": 0.8138449595397697, + "learning_rate": 1.2709359605911331e-05, + "loss": 0.5807296633720398, + "step": 130 + }, + { + "epoch": 0.06474731249227728, + "grad_norm": 0.6723079879875923, + "learning_rate": 1.2807881773399016e-05, + "loss": 0.5277815461158752, + "step": 131 + }, + { + "epoch": 0.06524156678611145, + "grad_norm": 0.6681532618442926, + "learning_rate": 1.29064039408867e-05, + "loss": 0.5044680833816528, + "step": 132 + }, + { + "epoch": 0.06573582107994563, + "grad_norm": 0.753382083900827, + "learning_rate": 1.3004926108374385e-05, + "loss": 0.5412886738777161, + "step": 133 + }, + { + "epoch": 0.06623007537377981, + "grad_norm": 0.7168767227212489, + "learning_rate": 1.310344827586207e-05, + "loss": 0.5314532518386841, + "step": 134 + }, + { + "epoch": 0.06672432966761399, + "grad_norm": 0.8393067756176276, + "learning_rate": 1.3201970443349755e-05, + "loss": 0.5544138550758362, + "step": 135 + }, + { + "epoch": 0.06721858396144817, + "grad_norm": 0.7720251101355328, + "learning_rate": 1.330049261083744e-05, + "loss": 0.5745705366134644, + "step": 136 + }, + { + "epoch": 0.06771283825528235, + "grad_norm": 0.8433611027798503, + "learning_rate": 1.3399014778325124e-05, + "loss": 0.5361800789833069, + "step": 137 + }, + { + "epoch": 0.06820709254911653, + "grad_norm": 0.7945865329579561, + "learning_rate": 1.3497536945812807e-05, + "loss": 0.5878221392631531, + "step": 138 + }, + { + "epoch": 0.06870134684295069, + "grad_norm": 0.7847520309491554, + "learning_rate": 1.3596059113300492e-05, + "loss": 0.5952787399291992, + "step": 139 + }, + { + "epoch": 0.06919560113678487, + "grad_norm": 0.7556944357281568, + "learning_rate": 1.369458128078818e-05, + "loss": 0.5334340929985046, + "step": 140 + }, + { + "epoch": 0.06968985543061905, + "grad_norm": 0.7730405260844581, + "learning_rate": 1.3793103448275863e-05, + "loss": 0.5297533273696899, + "step": 141 + }, + { + "epoch": 0.07018410972445323, + "grad_norm": 0.7838373123609123, + "learning_rate": 1.3891625615763548e-05, + "loss": 0.5388105511665344, + "step": 142 + }, + { + "epoch": 0.0706783640182874, + "grad_norm": 0.6827867428906486, + "learning_rate": 1.3990147783251233e-05, + "loss": 0.484375536441803, + "step": 143 + }, + { + "epoch": 0.07117261831212159, + "grad_norm": 0.7377838543831393, + "learning_rate": 1.4088669950738918e-05, + "loss": 0.5395358800888062, + "step": 144 + }, + { + "epoch": 0.07166687260595576, + "grad_norm": 0.7024037339686016, + "learning_rate": 1.4187192118226602e-05, + "loss": 0.501459538936615, + "step": 145 + }, + { + "epoch": 0.07216112689978994, + "grad_norm": 0.7544878056630825, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.5390491485595703, + "step": 146 + }, + { + "epoch": 0.07265538119362412, + "grad_norm": 0.7358581376182646, + "learning_rate": 1.4384236453201972e-05, + "loss": 0.505649745464325, + "step": 147 + }, + { + "epoch": 0.0731496354874583, + "grad_norm": 0.791834759029257, + "learning_rate": 1.4482758620689657e-05, + "loss": 0.5155121684074402, + "step": 148 + }, + { + "epoch": 0.07364388978129248, + "grad_norm": 0.9182625859668322, + "learning_rate": 1.4581280788177341e-05, + "loss": 0.5502114295959473, + "step": 149 + }, + { + "epoch": 0.07413814407512666, + "grad_norm": 0.7705513444985356, + "learning_rate": 1.4679802955665026e-05, + "loss": 0.5243497490882874, + "step": 150 + }, + { + "epoch": 0.07463239836896084, + "grad_norm": 0.7936247647794451, + "learning_rate": 1.4778325123152711e-05, + "loss": 0.529721736907959, + "step": 151 + }, + { + "epoch": 0.075126652662795, + "grad_norm": 0.7493387955752852, + "learning_rate": 1.4876847290640396e-05, + "loss": 0.4721008241176605, + "step": 152 + }, + { + "epoch": 0.07562090695662918, + "grad_norm": 0.8448372107109295, + "learning_rate": 1.4975369458128079e-05, + "loss": 0.46029576659202576, + "step": 153 + }, + { + "epoch": 0.07611516125046336, + "grad_norm": 0.8666504632745452, + "learning_rate": 1.5073891625615764e-05, + "loss": 0.5151746273040771, + "step": 154 + }, + { + "epoch": 0.07660941554429754, + "grad_norm": 0.8234378506914858, + "learning_rate": 1.5172413793103448e-05, + "loss": 0.4743254780769348, + "step": 155 + }, + { + "epoch": 0.07710366983813172, + "grad_norm": 0.7901189046711773, + "learning_rate": 1.5270935960591133e-05, + "loss": 0.5167561769485474, + "step": 156 + }, + { + "epoch": 0.0775979241319659, + "grad_norm": 0.7442599788530032, + "learning_rate": 1.536945812807882e-05, + "loss": 0.47482365369796753, + "step": 157 + }, + { + "epoch": 0.07809217842580007, + "grad_norm": 0.7472930500337165, + "learning_rate": 1.5467980295566506e-05, + "loss": 0.5088409781455994, + "step": 158 + }, + { + "epoch": 0.07858643271963425, + "grad_norm": 0.839637174922739, + "learning_rate": 1.556650246305419e-05, + "loss": 0.5264201164245605, + "step": 159 + }, + { + "epoch": 0.07908068701346843, + "grad_norm": 0.8043048232381864, + "learning_rate": 1.5665024630541875e-05, + "loss": 0.5475984811782837, + "step": 160 + }, + { + "epoch": 0.07957494130730261, + "grad_norm": 0.813963733997232, + "learning_rate": 1.576354679802956e-05, + "loss": 0.5652282238006592, + "step": 161 + }, + { + "epoch": 0.08006919560113679, + "grad_norm": 0.8257458665080726, + "learning_rate": 1.586206896551724e-05, + "loss": 0.5179979801177979, + "step": 162 + }, + { + "epoch": 0.08056344989497097, + "grad_norm": 0.7453513460678786, + "learning_rate": 1.5960591133004928e-05, + "loss": 0.4966253638267517, + "step": 163 + }, + { + "epoch": 0.08105770418880515, + "grad_norm": 0.7400908854625781, + "learning_rate": 1.605911330049261e-05, + "loss": 0.5216315388679504, + "step": 164 + }, + { + "epoch": 0.08155195848263931, + "grad_norm": 0.7974617542166776, + "learning_rate": 1.6157635467980298e-05, + "loss": 0.495576411485672, + "step": 165 + }, + { + "epoch": 0.08204621277647349, + "grad_norm": 0.7828217496299378, + "learning_rate": 1.625615763546798e-05, + "loss": 0.5101697444915771, + "step": 166 + }, + { + "epoch": 0.08254046707030767, + "grad_norm": 0.7891722656265441, + "learning_rate": 1.6354679802955667e-05, + "loss": 0.5438036918640137, + "step": 167 + }, + { + "epoch": 0.08303472136414185, + "grad_norm": 0.8062908900423786, + "learning_rate": 1.645320197044335e-05, + "loss": 0.5043500661849976, + "step": 168 + }, + { + "epoch": 0.08352897565797603, + "grad_norm": 0.8893145421032131, + "learning_rate": 1.6551724137931037e-05, + "loss": 0.5129355788230896, + "step": 169 + }, + { + "epoch": 0.0840232299518102, + "grad_norm": 0.8344265538652059, + "learning_rate": 1.665024630541872e-05, + "loss": 0.48643916845321655, + "step": 170 + }, + { + "epoch": 0.08451748424564438, + "grad_norm": 0.9138503767586129, + "learning_rate": 1.6748768472906406e-05, + "loss": 0.5300272703170776, + "step": 171 + }, + { + "epoch": 0.08501173853947856, + "grad_norm": 0.9819214205489949, + "learning_rate": 1.684729064039409e-05, + "loss": 0.5321004390716553, + "step": 172 + }, + { + "epoch": 0.08550599283331274, + "grad_norm": 0.9555025734347583, + "learning_rate": 1.6945812807881776e-05, + "loss": 0.5066401958465576, + "step": 173 + }, + { + "epoch": 0.08600024712714692, + "grad_norm": 0.8139597552129452, + "learning_rate": 1.704433497536946e-05, + "loss": 0.48993563652038574, + "step": 174 + }, + { + "epoch": 0.0864945014209811, + "grad_norm": 0.8921248257221488, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.5468013882637024, + "step": 175 + }, + { + "epoch": 0.08698875571481528, + "grad_norm": 0.8277628260630481, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.5081865191459656, + "step": 176 + }, + { + "epoch": 0.08748301000864946, + "grad_norm": 0.7727605442624492, + "learning_rate": 1.7339901477832515e-05, + "loss": 0.48374873399734497, + "step": 177 + }, + { + "epoch": 0.08797726430248362, + "grad_norm": 0.7716185332367417, + "learning_rate": 1.7438423645320198e-05, + "loss": 0.4929465651512146, + "step": 178 + }, + { + "epoch": 0.0884715185963178, + "grad_norm": 0.7369259534742475, + "learning_rate": 1.7536945812807884e-05, + "loss": 0.49666428565979004, + "step": 179 + }, + { + "epoch": 0.08896577289015198, + "grad_norm": 0.9095846029993176, + "learning_rate": 1.7635467980295567e-05, + "loss": 0.5705476403236389, + "step": 180 + }, + { + "epoch": 0.08946002718398616, + "grad_norm": 0.8153458294604309, + "learning_rate": 1.7733990147783254e-05, + "loss": 0.5466605424880981, + "step": 181 + }, + { + "epoch": 0.08995428147782034, + "grad_norm": 0.7908211366510465, + "learning_rate": 1.7832512315270937e-05, + "loss": 0.47837337851524353, + "step": 182 + }, + { + "epoch": 0.09044853577165451, + "grad_norm": 0.8050205335034676, + "learning_rate": 1.7931034482758623e-05, + "loss": 0.5370041131973267, + "step": 183 + }, + { + "epoch": 0.0909427900654887, + "grad_norm": 0.8315453873696782, + "learning_rate": 1.8029556650246306e-05, + "loss": 0.540340006351471, + "step": 184 + }, + { + "epoch": 0.09143704435932287, + "grad_norm": 0.7864886396514408, + "learning_rate": 1.8128078817733993e-05, + "loss": 0.5165396928787231, + "step": 185 + }, + { + "epoch": 0.09193129865315705, + "grad_norm": 1.0212742677335798, + "learning_rate": 1.8226600985221676e-05, + "loss": 0.5391616821289062, + "step": 186 + }, + { + "epoch": 0.09242555294699123, + "grad_norm": 0.8362655612683817, + "learning_rate": 1.8325123152709362e-05, + "loss": 0.472774475812912, + "step": 187 + }, + { + "epoch": 0.09291980724082541, + "grad_norm": 0.7994913228950927, + "learning_rate": 1.8423645320197045e-05, + "loss": 0.5079161524772644, + "step": 188 + }, + { + "epoch": 0.09341406153465959, + "grad_norm": 0.7908069143027292, + "learning_rate": 1.852216748768473e-05, + "loss": 0.4909520149230957, + "step": 189 + }, + { + "epoch": 0.09390831582849377, + "grad_norm": 0.8204263481704893, + "learning_rate": 1.8620689655172415e-05, + "loss": 0.5214540362358093, + "step": 190 + }, + { + "epoch": 0.09440257012232794, + "grad_norm": 1.0097310819423937, + "learning_rate": 1.8719211822660098e-05, + "loss": 0.4820341467857361, + "step": 191 + }, + { + "epoch": 0.09489682441616211, + "grad_norm": 0.7986122947719724, + "learning_rate": 1.8817733990147784e-05, + "loss": 0.5094855427742004, + "step": 192 + }, + { + "epoch": 0.09539107870999629, + "grad_norm": 0.8104059351445748, + "learning_rate": 1.8916256157635468e-05, + "loss": 0.47840312123298645, + "step": 193 + }, + { + "epoch": 0.09588533300383047, + "grad_norm": 0.8556791067143968, + "learning_rate": 1.9014778325123154e-05, + "loss": 0.5368070602416992, + "step": 194 + }, + { + "epoch": 0.09637958729766465, + "grad_norm": 0.8413108625552047, + "learning_rate": 1.911330049261084e-05, + "loss": 0.493880033493042, + "step": 195 + }, + { + "epoch": 0.09687384159149882, + "grad_norm": 0.8344269563446816, + "learning_rate": 1.9211822660098524e-05, + "loss": 0.5052261352539062, + "step": 196 + }, + { + "epoch": 0.097368095885333, + "grad_norm": 0.8488100596559239, + "learning_rate": 1.931034482758621e-05, + "loss": 0.4817495346069336, + "step": 197 + }, + { + "epoch": 0.09786235017916718, + "grad_norm": 0.8835550004433761, + "learning_rate": 1.9408866995073893e-05, + "loss": 0.530259370803833, + "step": 198 + }, + { + "epoch": 0.09835660447300136, + "grad_norm": 0.8634602606490965, + "learning_rate": 1.950738916256158e-05, + "loss": 0.4984540045261383, + "step": 199 + }, + { + "epoch": 0.09885085876683554, + "grad_norm": 0.8655848178642821, + "learning_rate": 1.9605911330049263e-05, + "loss": 0.5472708940505981, + "step": 200 + }, + { + "epoch": 0.09934511306066972, + "grad_norm": 0.8520053240792014, + "learning_rate": 1.970443349753695e-05, + "loss": 0.5394926071166992, + "step": 201 + }, + { + "epoch": 0.0998393673545039, + "grad_norm": 0.9089636816290306, + "learning_rate": 1.9802955665024632e-05, + "loss": 0.5299160480499268, + "step": 202 + }, + { + "epoch": 0.10033362164833808, + "grad_norm": 0.9396000630272938, + "learning_rate": 1.990147783251232e-05, + "loss": 0.506400465965271, + "step": 203 + }, + { + "epoch": 0.10082787594217225, + "grad_norm": 0.7711226267847403, + "learning_rate": 2e-05, + "loss": 0.47956231236457825, + "step": 204 + }, + { + "epoch": 0.10132213023600642, + "grad_norm": 0.8971065288988803, + "learning_rate": 1.9999996662071442e-05, + "loss": 0.48805660009384155, + "step": 205 + }, + { + "epoch": 0.1018163845298406, + "grad_norm": 0.8419748393313904, + "learning_rate": 1.9999986648287996e-05, + "loss": 0.46014025807380676, + "step": 206 + }, + { + "epoch": 0.10231063882367478, + "grad_norm": 0.8667704651728929, + "learning_rate": 1.9999969958656345e-05, + "loss": 0.4654610753059387, + "step": 207 + }, + { + "epoch": 0.10280489311750896, + "grad_norm": 0.8143872307343123, + "learning_rate": 1.999994659318763e-05, + "loss": 0.47037336230278015, + "step": 208 + }, + { + "epoch": 0.10329914741134313, + "grad_norm": 0.8238466130965688, + "learning_rate": 1.999991655189745e-05, + "loss": 0.4853154718875885, + "step": 209 + }, + { + "epoch": 0.10379340170517731, + "grad_norm": 0.8233043672230826, + "learning_rate": 1.9999879834805865e-05, + "loss": 0.4918109178543091, + "step": 210 + }, + { + "epoch": 0.10428765599901149, + "grad_norm": 0.787297319281164, + "learning_rate": 1.999983644193738e-05, + "loss": 0.5136955380439758, + "step": 211 + }, + { + "epoch": 0.10478191029284567, + "grad_norm": 0.8895124065919626, + "learning_rate": 1.9999786373320972e-05, + "loss": 0.5145115852355957, + "step": 212 + }, + { + "epoch": 0.10527616458667985, + "grad_norm": 0.8153315460424436, + "learning_rate": 1.9999729628990058e-05, + "loss": 0.4624764025211334, + "step": 213 + }, + { + "epoch": 0.10577041888051403, + "grad_norm": 0.7949012412003572, + "learning_rate": 1.9999666208982518e-05, + "loss": 0.4599718749523163, + "step": 214 + }, + { + "epoch": 0.1062646731743482, + "grad_norm": 0.8110981138692489, + "learning_rate": 1.99995961133407e-05, + "loss": 0.4642864465713501, + "step": 215 + }, + { + "epoch": 0.10675892746818239, + "grad_norm": 0.7785663242974379, + "learning_rate": 1.9999519342111392e-05, + "loss": 0.4756677448749542, + "step": 216 + }, + { + "epoch": 0.10725318176201656, + "grad_norm": 0.8781173550322721, + "learning_rate": 1.9999435895345846e-05, + "loss": 0.4982803463935852, + "step": 217 + }, + { + "epoch": 0.10774743605585073, + "grad_norm": 0.905677346569408, + "learning_rate": 1.999934577309977e-05, + "loss": 0.5189295411109924, + "step": 218 + }, + { + "epoch": 0.10824169034968491, + "grad_norm": 0.8870093356565885, + "learning_rate": 1.999924897543333e-05, + "loss": 0.5077873468399048, + "step": 219 + }, + { + "epoch": 0.10873594464351909, + "grad_norm": 0.9164316488089079, + "learning_rate": 1.9999145502411148e-05, + "loss": 0.5510451793670654, + "step": 220 + }, + { + "epoch": 0.10923019893735327, + "grad_norm": 0.850640343977404, + "learning_rate": 1.9999035354102298e-05, + "loss": 0.44604551792144775, + "step": 221 + }, + { + "epoch": 0.10972445323118744, + "grad_norm": 0.7739778425864705, + "learning_rate": 1.9998918530580315e-05, + "loss": 0.42567160725593567, + "step": 222 + }, + { + "epoch": 0.11021870752502162, + "grad_norm": 0.8699648367810445, + "learning_rate": 1.9998795031923186e-05, + "loss": 0.4622190594673157, + "step": 223 + }, + { + "epoch": 0.1107129618188558, + "grad_norm": 0.8261943707290175, + "learning_rate": 1.999866485821336e-05, + "loss": 0.5023611783981323, + "step": 224 + }, + { + "epoch": 0.11120721611268998, + "grad_norm": 0.769204860463621, + "learning_rate": 1.9998528009537735e-05, + "loss": 0.451701819896698, + "step": 225 + }, + { + "epoch": 0.11170147040652416, + "grad_norm": 0.9053438794448195, + "learning_rate": 1.9998384485987675e-05, + "loss": 0.48493725061416626, + "step": 226 + }, + { + "epoch": 0.11219572470035834, + "grad_norm": 0.7780216873284675, + "learning_rate": 1.9998234287658996e-05, + "loss": 0.45377853512763977, + "step": 227 + }, + { + "epoch": 0.11268997899419252, + "grad_norm": 0.9129521331875277, + "learning_rate": 1.9998077414651957e-05, + "loss": 0.48963701725006104, + "step": 228 + }, + { + "epoch": 0.1131842332880267, + "grad_norm": 0.8500208947168179, + "learning_rate": 1.9997913867071296e-05, + "loss": 0.47935402393341064, + "step": 229 + }, + { + "epoch": 0.11367848758186087, + "grad_norm": 0.8984825507205957, + "learning_rate": 1.999774364502619e-05, + "loss": 0.46203523874282837, + "step": 230 + }, + { + "epoch": 0.11417274187569504, + "grad_norm": 0.8695917880315948, + "learning_rate": 1.9997566748630274e-05, + "loss": 0.4411412179470062, + "step": 231 + }, + { + "epoch": 0.11466699616952922, + "grad_norm": 0.9063292151670944, + "learning_rate": 1.9997383178001646e-05, + "loss": 0.44424787163734436, + "step": 232 + }, + { + "epoch": 0.1151612504633634, + "grad_norm": 0.9239108187837685, + "learning_rate": 1.9997192933262853e-05, + "loss": 0.4862042963504791, + "step": 233 + }, + { + "epoch": 0.11565550475719757, + "grad_norm": 0.9583721120887143, + "learning_rate": 1.99969960145409e-05, + "loss": 0.49599340558052063, + "step": 234 + }, + { + "epoch": 0.11614975905103175, + "grad_norm": 0.8373453660412895, + "learning_rate": 1.999679242196725e-05, + "loss": 0.49702027440071106, + "step": 235 + }, + { + "epoch": 0.11664401334486593, + "grad_norm": 0.9122480348696357, + "learning_rate": 1.9996582155677813e-05, + "loss": 0.520037829875946, + "step": 236 + }, + { + "epoch": 0.11713826763870011, + "grad_norm": 0.8765545420336399, + "learning_rate": 1.999636521581296e-05, + "loss": 0.4571160674095154, + "step": 237 + }, + { + "epoch": 0.11763252193253429, + "grad_norm": 0.8739431997449725, + "learning_rate": 1.9996141602517526e-05, + "loss": 0.45602840185165405, + "step": 238 + }, + { + "epoch": 0.11812677622636847, + "grad_norm": 0.8737753030098584, + "learning_rate": 1.999591131594078e-05, + "loss": 0.4909728169441223, + "step": 239 + }, + { + "epoch": 0.11862103052020265, + "grad_norm": 0.9637438681008479, + "learning_rate": 1.9995674356236468e-05, + "loss": 0.47716090083122253, + "step": 240 + }, + { + "epoch": 0.11911528481403683, + "grad_norm": 0.8781513787464966, + "learning_rate": 1.9995430723562774e-05, + "loss": 0.4449527859687805, + "step": 241 + }, + { + "epoch": 0.119609539107871, + "grad_norm": 0.9278951723441426, + "learning_rate": 1.9995180418082347e-05, + "loss": 0.49069035053253174, + "step": 242 + }, + { + "epoch": 0.12010379340170518, + "grad_norm": 0.8082383806465664, + "learning_rate": 1.9994923439962286e-05, + "loss": 0.506738543510437, + "step": 243 + }, + { + "epoch": 0.12059804769553935, + "grad_norm": 0.7256243644120642, + "learning_rate": 1.9994659789374145e-05, + "loss": 0.38516658544540405, + "step": 244 + }, + { + "epoch": 0.12109230198937353, + "grad_norm": 1.120005864402108, + "learning_rate": 1.9994389466493942e-05, + "loss": 0.49539780616760254, + "step": 245 + }, + { + "epoch": 0.1215865562832077, + "grad_norm": 0.8099291045850996, + "learning_rate": 1.999411247150213e-05, + "loss": 0.4400706887245178, + "step": 246 + }, + { + "epoch": 0.12208081057704188, + "grad_norm": 1.033732324753182, + "learning_rate": 1.9993828804583625e-05, + "loss": 0.48815736174583435, + "step": 247 + }, + { + "epoch": 0.12257506487087606, + "grad_norm": 0.8506340248073136, + "learning_rate": 1.999353846592781e-05, + "loss": 0.42744773626327515, + "step": 248 + }, + { + "epoch": 0.12306931916471024, + "grad_norm": 0.8847437809130215, + "learning_rate": 1.9993241455728505e-05, + "loss": 0.4370969235897064, + "step": 249 + }, + { + "epoch": 0.12356357345854442, + "grad_norm": 0.8643380888364789, + "learning_rate": 1.9992937774183988e-05, + "loss": 0.4803960621356964, + "step": 250 + }, + { + "epoch": 0.1240578277523786, + "grad_norm": 0.8986867692232635, + "learning_rate": 1.9992627421496994e-05, + "loss": 0.4614640474319458, + "step": 251 + }, + { + "epoch": 0.12455208204621278, + "grad_norm": 0.819634526245566, + "learning_rate": 1.9992310397874715e-05, + "loss": 0.46626490354537964, + "step": 252 + }, + { + "epoch": 0.12504633634004694, + "grad_norm": 0.8614062439986471, + "learning_rate": 1.9991986703528784e-05, + "loss": 0.4812886416912079, + "step": 253 + }, + { + "epoch": 0.12554059063388112, + "grad_norm": 0.782352455662906, + "learning_rate": 1.99916563386753e-05, + "loss": 0.45037686824798584, + "step": 254 + }, + { + "epoch": 0.1260348449277153, + "grad_norm": 0.8735972282090627, + "learning_rate": 1.9991319303534804e-05, + "loss": 0.48492124676704407, + "step": 255 + }, + { + "epoch": 0.12652909922154948, + "grad_norm": 0.9123971905878313, + "learning_rate": 1.9990975598332304e-05, + "loss": 0.48825496435165405, + "step": 256 + }, + { + "epoch": 0.12702335351538366, + "grad_norm": 0.9350748088966393, + "learning_rate": 1.9990625223297244e-05, + "loss": 0.4836634695529938, + "step": 257 + }, + { + "epoch": 0.12751760780921784, + "grad_norm": 0.8091067369882244, + "learning_rate": 1.9990268178663538e-05, + "loss": 0.4632943272590637, + "step": 258 + }, + { + "epoch": 0.12801186210305202, + "grad_norm": 0.8933963237824735, + "learning_rate": 1.9989904464669533e-05, + "loss": 0.4601137042045593, + "step": 259 + }, + { + "epoch": 0.1285061163968862, + "grad_norm": 0.956219889400008, + "learning_rate": 1.998953408155805e-05, + "loss": 0.4390139579772949, + "step": 260 + }, + { + "epoch": 0.12900037069072037, + "grad_norm": 0.8209256250218969, + "learning_rate": 1.9989157029576348e-05, + "loss": 0.45749080181121826, + "step": 261 + }, + { + "epoch": 0.12949462498455455, + "grad_norm": 0.8687280720196128, + "learning_rate": 1.998877330897614e-05, + "loss": 0.4490616023540497, + "step": 262 + }, + { + "epoch": 0.12998887927838873, + "grad_norm": 0.8048623785766325, + "learning_rate": 1.998838292001359e-05, + "loss": 0.4819987714290619, + "step": 263 + }, + { + "epoch": 0.1304831335722229, + "grad_norm": 0.8512266303867803, + "learning_rate": 1.9987985862949325e-05, + "loss": 0.4448384940624237, + "step": 264 + }, + { + "epoch": 0.1309773878660571, + "grad_norm": 0.8699526878628875, + "learning_rate": 1.9987582138048405e-05, + "loss": 0.4574149549007416, + "step": 265 + }, + { + "epoch": 0.13147164215989127, + "grad_norm": 0.8239086741829158, + "learning_rate": 1.9987171745580353e-05, + "loss": 0.4765186607837677, + "step": 266 + }, + { + "epoch": 0.13196589645372545, + "grad_norm": 0.8859727328667625, + "learning_rate": 1.998675468581915e-05, + "loss": 0.4900081753730774, + "step": 267 + }, + { + "epoch": 0.13246015074755962, + "grad_norm": 0.8200731674424109, + "learning_rate": 1.9986330959043206e-05, + "loss": 0.433933287858963, + "step": 268 + }, + { + "epoch": 0.1329544050413938, + "grad_norm": 0.8424887851968712, + "learning_rate": 1.9985900565535403e-05, + "loss": 0.452491819858551, + "step": 269 + }, + { + "epoch": 0.13344865933522798, + "grad_norm": 0.8454499255279871, + "learning_rate": 1.9985463505583062e-05, + "loss": 0.4583294987678528, + "step": 270 + }, + { + "epoch": 0.13394291362906216, + "grad_norm": 0.7993545503780815, + "learning_rate": 1.9985019779477958e-05, + "loss": 0.43183961510658264, + "step": 271 + }, + { + "epoch": 0.13443716792289634, + "grad_norm": 0.8548370246393396, + "learning_rate": 1.998456938751632e-05, + "loss": 0.48075324296951294, + "step": 272 + }, + { + "epoch": 0.13493142221673052, + "grad_norm": 0.9002412472414919, + "learning_rate": 1.9984112329998825e-05, + "loss": 0.5131007432937622, + "step": 273 + }, + { + "epoch": 0.1354256765105647, + "grad_norm": 0.9730858409317547, + "learning_rate": 1.998364860723059e-05, + "loss": 0.4841446876525879, + "step": 274 + }, + { + "epoch": 0.13591993080439888, + "grad_norm": 0.845168898875427, + "learning_rate": 1.9983178219521194e-05, + "loss": 0.5001078248023987, + "step": 275 + }, + { + "epoch": 0.13641418509823305, + "grad_norm": 0.9216453803321015, + "learning_rate": 1.998270116718466e-05, + "loss": 0.44851893186569214, + "step": 276 + }, + { + "epoch": 0.1369084393920672, + "grad_norm": 0.8496437780068066, + "learning_rate": 1.9982217450539464e-05, + "loss": 0.4635714888572693, + "step": 277 + }, + { + "epoch": 0.13740269368590138, + "grad_norm": 0.8697167139912243, + "learning_rate": 1.9981727069908525e-05, + "loss": 0.4171838164329529, + "step": 278 + }, + { + "epoch": 0.13789694797973556, + "grad_norm": 0.9173222191020198, + "learning_rate": 1.9981230025619216e-05, + "loss": 0.4819942116737366, + "step": 279 + }, + { + "epoch": 0.13839120227356974, + "grad_norm": 0.965585018194969, + "learning_rate": 1.998072631800336e-05, + "loss": 0.47878971695899963, + "step": 280 + }, + { + "epoch": 0.13888545656740392, + "grad_norm": 0.8354999533998939, + "learning_rate": 1.9980215947397217e-05, + "loss": 0.4436519145965576, + "step": 281 + }, + { + "epoch": 0.1393797108612381, + "grad_norm": 0.9615471937507843, + "learning_rate": 1.9979698914141507e-05, + "loss": 0.4633050262928009, + "step": 282 + }, + { + "epoch": 0.13987396515507228, + "grad_norm": 0.8419828093645744, + "learning_rate": 1.9979175218581397e-05, + "loss": 0.4264826774597168, + "step": 283 + }, + { + "epoch": 0.14036821944890646, + "grad_norm": 0.9397240311894202, + "learning_rate": 1.9978644861066493e-05, + "loss": 0.47763916850090027, + "step": 284 + }, + { + "epoch": 0.14086247374274063, + "grad_norm": 0.9621046785661004, + "learning_rate": 1.997810784195086e-05, + "loss": 0.44895434379577637, + "step": 285 + }, + { + "epoch": 0.1413567280365748, + "grad_norm": 0.9045420673708359, + "learning_rate": 1.9977564161593e-05, + "loss": 0.4287600517272949, + "step": 286 + }, + { + "epoch": 0.141850982330409, + "grad_norm": 0.9070406248365095, + "learning_rate": 1.997701382035587e-05, + "loss": 0.44175297021865845, + "step": 287 + }, + { + "epoch": 0.14234523662424317, + "grad_norm": 0.9409958894859969, + "learning_rate": 1.9976456818606868e-05, + "loss": 0.4393232464790344, + "step": 288 + }, + { + "epoch": 0.14283949091807735, + "grad_norm": 0.9574764348211552, + "learning_rate": 1.9975893156717836e-05, + "loss": 0.4600023329257965, + "step": 289 + }, + { + "epoch": 0.14333374521191153, + "grad_norm": 0.9582932704552442, + "learning_rate": 1.9975322835065075e-05, + "loss": 0.4819300174713135, + "step": 290 + }, + { + "epoch": 0.1438279995057457, + "grad_norm": 0.8798665685233671, + "learning_rate": 1.9974745854029318e-05, + "loss": 0.4391498267650604, + "step": 291 + }, + { + "epoch": 0.14432225379957989, + "grad_norm": 0.8278978827145046, + "learning_rate": 1.9974162213995748e-05, + "loss": 0.43435904383659363, + "step": 292 + }, + { + "epoch": 0.14481650809341406, + "grad_norm": 0.8555919001416697, + "learning_rate": 1.9973571915354e-05, + "loss": 0.43575727939605713, + "step": 293 + }, + { + "epoch": 0.14531076238724824, + "grad_norm": 0.847472972308698, + "learning_rate": 1.9972974958498145e-05, + "loss": 0.39998459815979004, + "step": 294 + }, + { + "epoch": 0.14580501668108242, + "grad_norm": 0.9068432330089449, + "learning_rate": 1.9972371343826705e-05, + "loss": 0.4620361030101776, + "step": 295 + }, + { + "epoch": 0.1462992709749166, + "grad_norm": 0.9496965104492539, + "learning_rate": 1.9971761071742644e-05, + "loss": 0.5172264575958252, + "step": 296 + }, + { + "epoch": 0.14679352526875078, + "grad_norm": 0.9234160870013586, + "learning_rate": 1.997114414265337e-05, + "loss": 0.4685489535331726, + "step": 297 + }, + { + "epoch": 0.14728777956258496, + "grad_norm": 0.8830728533856737, + "learning_rate": 1.9970520556970735e-05, + "loss": 0.4346499741077423, + "step": 298 + }, + { + "epoch": 0.14778203385641914, + "grad_norm": 0.8462127222831192, + "learning_rate": 1.996989031511104e-05, + "loss": 0.4051141142845154, + "step": 299 + }, + { + "epoch": 0.14827628815025332, + "grad_norm": 1.5751283315817302, + "learning_rate": 1.996925341749502e-05, + "loss": 0.4862591028213501, + "step": 300 + }, + { + "epoch": 0.1487705424440875, + "grad_norm": 0.9475006076143342, + "learning_rate": 1.996860986454787e-05, + "loss": 0.44075754284858704, + "step": 301 + }, + { + "epoch": 0.14926479673792167, + "grad_norm": 0.8707373783945862, + "learning_rate": 1.99679596566992e-05, + "loss": 0.44321805238723755, + "step": 302 + }, + { + "epoch": 0.14975905103175585, + "grad_norm": 0.8195768056986794, + "learning_rate": 1.996730279438309e-05, + "loss": 0.4468157887458801, + "step": 303 + }, + { + "epoch": 0.15025330532559, + "grad_norm": 0.9918503423974457, + "learning_rate": 1.996663927803805e-05, + "loss": 0.48698270320892334, + "step": 304 + }, + { + "epoch": 0.15074755961942418, + "grad_norm": 0.9116215117394889, + "learning_rate": 1.9965969108107032e-05, + "loss": 0.41898253560066223, + "step": 305 + }, + { + "epoch": 0.15124181391325836, + "grad_norm": 0.9221438157249551, + "learning_rate": 1.9965292285037437e-05, + "loss": 0.4827130436897278, + "step": 306 + }, + { + "epoch": 0.15173606820709254, + "grad_norm": 0.8314057300557679, + "learning_rate": 1.99646088092811e-05, + "loss": 0.4219037592411041, + "step": 307 + }, + { + "epoch": 0.15223032250092672, + "grad_norm": 0.8392045773293594, + "learning_rate": 1.9963918681294298e-05, + "loss": 0.4431123733520508, + "step": 308 + }, + { + "epoch": 0.1527245767947609, + "grad_norm": 0.8500815118931239, + "learning_rate": 1.996322190153775e-05, + "loss": 0.4161941409111023, + "step": 309 + }, + { + "epoch": 0.15321883108859508, + "grad_norm": 0.9107651666369411, + "learning_rate": 1.9962518470476617e-05, + "loss": 0.4774768650531769, + "step": 310 + }, + { + "epoch": 0.15371308538242925, + "grad_norm": 0.8037347887475985, + "learning_rate": 1.9961808388580503e-05, + "loss": 0.4196036159992218, + "step": 311 + }, + { + "epoch": 0.15420733967626343, + "grad_norm": 1.0067362464519019, + "learning_rate": 1.996109165632344e-05, + "loss": 0.44241398572921753, + "step": 312 + }, + { + "epoch": 0.1547015939700976, + "grad_norm": 0.888150506782497, + "learning_rate": 1.996036827418392e-05, + "loss": 0.47662627696990967, + "step": 313 + }, + { + "epoch": 0.1551958482639318, + "grad_norm": 0.8458159023673953, + "learning_rate": 1.9959638242644855e-05, + "loss": 0.4241487979888916, + "step": 314 + }, + { + "epoch": 0.15569010255776597, + "grad_norm": 0.9355978957071136, + "learning_rate": 1.9958901562193605e-05, + "loss": 0.45686113834381104, + "step": 315 + }, + { + "epoch": 0.15618435685160015, + "grad_norm": 0.944155507976385, + "learning_rate": 1.9958158233321968e-05, + "loss": 0.4154825806617737, + "step": 316 + }, + { + "epoch": 0.15667861114543433, + "grad_norm": 0.9827195710672626, + "learning_rate": 1.9957408256526176e-05, + "loss": 0.4705435037612915, + "step": 317 + }, + { + "epoch": 0.1571728654392685, + "grad_norm": 0.9880074034620054, + "learning_rate": 1.9956651632306908e-05, + "loss": 0.4367898404598236, + "step": 318 + }, + { + "epoch": 0.15766711973310268, + "grad_norm": 0.9294773909083144, + "learning_rate": 1.9955888361169272e-05, + "loss": 0.4668901264667511, + "step": 319 + }, + { + "epoch": 0.15816137402693686, + "grad_norm": 0.9543525396859661, + "learning_rate": 1.995511844362282e-05, + "loss": 0.46429356932640076, + "step": 320 + }, + { + "epoch": 0.15865562832077104, + "grad_norm": 0.9206239653453478, + "learning_rate": 1.9954341880181536e-05, + "loss": 0.4582952857017517, + "step": 321 + }, + { + "epoch": 0.15914988261460522, + "grad_norm": 0.9460762127599929, + "learning_rate": 1.9953558671363843e-05, + "loss": 0.45110762119293213, + "step": 322 + }, + { + "epoch": 0.1596441369084394, + "grad_norm": 0.9441078381056233, + "learning_rate": 1.99527688176926e-05, + "loss": 0.4049065113067627, + "step": 323 + }, + { + "epoch": 0.16013839120227358, + "grad_norm": 0.8033040053333058, + "learning_rate": 1.9951972319695105e-05, + "loss": 0.40884825587272644, + "step": 324 + }, + { + "epoch": 0.16063264549610776, + "grad_norm": 0.902465277703788, + "learning_rate": 1.9951169177903084e-05, + "loss": 0.4416786730289459, + "step": 325 + }, + { + "epoch": 0.16112689978994194, + "grad_norm": 0.8396124025463547, + "learning_rate": 1.9950359392852704e-05, + "loss": 0.4318765103816986, + "step": 326 + }, + { + "epoch": 0.16162115408377611, + "grad_norm": 0.9197188335811614, + "learning_rate": 1.9949542965084564e-05, + "loss": 0.4415965974330902, + "step": 327 + }, + { + "epoch": 0.1621154083776103, + "grad_norm": 0.9816748337776936, + "learning_rate": 1.9948719895143703e-05, + "loss": 0.4816298186779022, + "step": 328 + }, + { + "epoch": 0.16260966267144447, + "grad_norm": 0.8960734361029558, + "learning_rate": 1.9947890183579594e-05, + "loss": 0.4329088032245636, + "step": 329 + }, + { + "epoch": 0.16310391696527862, + "grad_norm": 0.9960918612087606, + "learning_rate": 1.9947053830946134e-05, + "loss": 0.43193015456199646, + "step": 330 + }, + { + "epoch": 0.1635981712591128, + "grad_norm": 0.9310501291263382, + "learning_rate": 1.994621083780166e-05, + "loss": 0.48738086223602295, + "step": 331 + }, + { + "epoch": 0.16409242555294698, + "grad_norm": 0.9523291617618251, + "learning_rate": 1.9945361204708948e-05, + "loss": 0.4707815647125244, + "step": 332 + }, + { + "epoch": 0.16458667984678116, + "grad_norm": 0.8438149141988297, + "learning_rate": 1.9944504932235198e-05, + "loss": 0.4190637469291687, + "step": 333 + }, + { + "epoch": 0.16508093414061534, + "grad_norm": 0.9348901251563362, + "learning_rate": 1.9943642020952042e-05, + "loss": 0.45955735445022583, + "step": 334 + }, + { + "epoch": 0.16557518843444952, + "grad_norm": 0.9334033255095994, + "learning_rate": 1.9942772471435555e-05, + "loss": 0.4675702750682831, + "step": 335 + }, + { + "epoch": 0.1660694427282837, + "grad_norm": 0.9694338385909206, + "learning_rate": 1.9941896284266224e-05, + "loss": 0.42571327090263367, + "step": 336 + }, + { + "epoch": 0.16656369702211787, + "grad_norm": 0.827954024094364, + "learning_rate": 1.994101346002899e-05, + "loss": 0.4341443181037903, + "step": 337 + }, + { + "epoch": 0.16705795131595205, + "grad_norm": 0.9227161087353433, + "learning_rate": 1.9940123999313214e-05, + "loss": 0.4473035931587219, + "step": 338 + }, + { + "epoch": 0.16755220560978623, + "grad_norm": 0.9514215023205275, + "learning_rate": 1.9939227902712676e-05, + "loss": 0.4692152142524719, + "step": 339 + }, + { + "epoch": 0.1680464599036204, + "grad_norm": 0.902462533797338, + "learning_rate": 1.9938325170825607e-05, + "loss": 0.4169067442417145, + "step": 340 + }, + { + "epoch": 0.1685407141974546, + "grad_norm": 0.8958693793994358, + "learning_rate": 1.9937415804254657e-05, + "loss": 0.451092928647995, + "step": 341 + }, + { + "epoch": 0.16903496849128877, + "grad_norm": 0.9439820250269497, + "learning_rate": 1.99364998036069e-05, + "loss": 0.39640212059020996, + "step": 342 + }, + { + "epoch": 0.16952922278512295, + "grad_norm": 0.9953253959869931, + "learning_rate": 1.9935577169493854e-05, + "loss": 0.46396374702453613, + "step": 343 + }, + { + "epoch": 0.17002347707895712, + "grad_norm": 0.940542166338043, + "learning_rate": 1.9934647902531453e-05, + "loss": 0.4343748390674591, + "step": 344 + }, + { + "epoch": 0.1705177313727913, + "grad_norm": 0.8926095624124082, + "learning_rate": 1.9933712003340056e-05, + "loss": 0.4353589713573456, + "step": 345 + }, + { + "epoch": 0.17101198566662548, + "grad_norm": 0.981244679678695, + "learning_rate": 1.9932769472544464e-05, + "loss": 0.4423677623271942, + "step": 346 + }, + { + "epoch": 0.17150623996045966, + "grad_norm": 0.9632090771111401, + "learning_rate": 1.9931820310773894e-05, + "loss": 0.4382045865058899, + "step": 347 + }, + { + "epoch": 0.17200049425429384, + "grad_norm": 0.9042153187184925, + "learning_rate": 1.993086451866199e-05, + "loss": 0.3966183066368103, + "step": 348 + }, + { + "epoch": 0.17249474854812802, + "grad_norm": 0.9998736444681166, + "learning_rate": 1.9929902096846833e-05, + "loss": 0.48624011874198914, + "step": 349 + }, + { + "epoch": 0.1729890028419622, + "grad_norm": 0.9399569652966117, + "learning_rate": 1.9928933045970913e-05, + "loss": 0.4442569315433502, + "step": 350 + }, + { + "epoch": 0.17348325713579638, + "grad_norm": 0.9204808269523502, + "learning_rate": 1.992795736668116e-05, + "loss": 0.42499929666519165, + "step": 351 + }, + { + "epoch": 0.17397751142963055, + "grad_norm": 0.9507435140290256, + "learning_rate": 1.9926975059628923e-05, + "loss": 0.4230741858482361, + "step": 352 + }, + { + "epoch": 0.17447176572346473, + "grad_norm": 0.9092303670359448, + "learning_rate": 1.9925986125469974e-05, + "loss": 0.4273882806301117, + "step": 353 + }, + { + "epoch": 0.1749660200172989, + "grad_norm": 0.9603670891238569, + "learning_rate": 1.9924990564864513e-05, + "loss": 0.45237618684768677, + "step": 354 + }, + { + "epoch": 0.1754602743111331, + "grad_norm": 0.8737901526941092, + "learning_rate": 1.9923988378477165e-05, + "loss": 0.4115524888038635, + "step": 355 + }, + { + "epoch": 0.17595452860496724, + "grad_norm": 0.8886450314145863, + "learning_rate": 1.9922979566976968e-05, + "loss": 0.4476633071899414, + "step": 356 + }, + { + "epoch": 0.17644878289880142, + "grad_norm": 1.155944411883778, + "learning_rate": 1.9921964131037398e-05, + "loss": 0.44930100440979004, + "step": 357 + }, + { + "epoch": 0.1769430371926356, + "grad_norm": 1.0356351975379994, + "learning_rate": 1.9920942071336338e-05, + "loss": 0.4714374244213104, + "step": 358 + }, + { + "epoch": 0.17743729148646978, + "grad_norm": 0.9469405731486913, + "learning_rate": 1.9919913388556105e-05, + "loss": 0.47696003317832947, + "step": 359 + }, + { + "epoch": 0.17793154578030396, + "grad_norm": 0.9021123492009391, + "learning_rate": 1.9918878083383434e-05, + "loss": 0.44937074184417725, + "step": 360 + }, + { + "epoch": 0.17842580007413814, + "grad_norm": 0.9771832594876818, + "learning_rate": 1.9917836156509472e-05, + "loss": 0.44937658309936523, + "step": 361 + }, + { + "epoch": 0.17892005436797231, + "grad_norm": 0.8240548100976023, + "learning_rate": 1.9916787608629805e-05, + "loss": 0.42068418860435486, + "step": 362 + }, + { + "epoch": 0.1794143086618065, + "grad_norm": 0.9112160927316303, + "learning_rate": 1.9915732440444428e-05, + "loss": 0.3791036605834961, + "step": 363 + }, + { + "epoch": 0.17990856295564067, + "grad_norm": 0.8982890263422821, + "learning_rate": 1.991467065265775e-05, + "loss": 0.401694118976593, + "step": 364 + }, + { + "epoch": 0.18040281724947485, + "grad_norm": 0.9743587318559909, + "learning_rate": 1.9913602245978602e-05, + "loss": 0.44095057249069214, + "step": 365 + }, + { + "epoch": 0.18089707154330903, + "grad_norm": 1.0125028049881057, + "learning_rate": 1.9912527221120248e-05, + "loss": 0.435880184173584, + "step": 366 + }, + { + "epoch": 0.1813913258371432, + "grad_norm": 0.9329716691545672, + "learning_rate": 1.991144557880035e-05, + "loss": 0.4147350490093231, + "step": 367 + }, + { + "epoch": 0.1818855801309774, + "grad_norm": 1.0077861725089856, + "learning_rate": 1.9910357319741006e-05, + "loss": 0.4191502630710602, + "step": 368 + }, + { + "epoch": 0.18237983442481157, + "grad_norm": 0.9334667001994715, + "learning_rate": 1.9909262444668715e-05, + "loss": 0.41988956928253174, + "step": 369 + }, + { + "epoch": 0.18287408871864574, + "grad_norm": 1.0279430559635638, + "learning_rate": 1.99081609543144e-05, + "loss": 0.47451251745224, + "step": 370 + }, + { + "epoch": 0.18336834301247992, + "grad_norm": 0.9591522165165333, + "learning_rate": 1.9907052849413408e-05, + "loss": 0.44665899872779846, + "step": 371 + }, + { + "epoch": 0.1838625973063141, + "grad_norm": 1.0147189696208934, + "learning_rate": 1.990593813070548e-05, + "loss": 0.40575331449508667, + "step": 372 + }, + { + "epoch": 0.18435685160014828, + "grad_norm": 0.869456919545876, + "learning_rate": 1.99048167989348e-05, + "loss": 0.40580621361732483, + "step": 373 + }, + { + "epoch": 0.18485110589398246, + "grad_norm": 0.9514367145479501, + "learning_rate": 1.9903688854849948e-05, + "loss": 0.461843878030777, + "step": 374 + }, + { + "epoch": 0.18534536018781664, + "grad_norm": 0.9237949473924573, + "learning_rate": 1.990255429920392e-05, + "loss": 0.38992881774902344, + "step": 375 + }, + { + "epoch": 0.18583961448165082, + "grad_norm": 0.8831901142276523, + "learning_rate": 1.9901413132754133e-05, + "loss": 0.4288073480129242, + "step": 376 + }, + { + "epoch": 0.186333868775485, + "grad_norm": 0.9233387492673684, + "learning_rate": 1.9900265356262418e-05, + "loss": 0.4376278221607208, + "step": 377 + }, + { + "epoch": 0.18682812306931917, + "grad_norm": 1.0362403856880367, + "learning_rate": 1.9899110970495e-05, + "loss": 0.4127569794654846, + "step": 378 + }, + { + "epoch": 0.18732237736315335, + "grad_norm": 0.9507974239376735, + "learning_rate": 1.9897949976222543e-05, + "loss": 0.4221431016921997, + "step": 379 + }, + { + "epoch": 0.18781663165698753, + "grad_norm": 0.9433678538632697, + "learning_rate": 1.9896782374220108e-05, + "loss": 0.3540682792663574, + "step": 380 + }, + { + "epoch": 0.1883108859508217, + "grad_norm": 0.9261378158924178, + "learning_rate": 1.9895608165267165e-05, + "loss": 0.3746468424797058, + "step": 381 + }, + { + "epoch": 0.1888051402446559, + "grad_norm": 0.885989840984364, + "learning_rate": 1.9894427350147602e-05, + "loss": 0.44986462593078613, + "step": 382 + }, + { + "epoch": 0.18929939453849004, + "grad_norm": 0.990953109983041, + "learning_rate": 1.9893239929649716e-05, + "loss": 0.38902726769447327, + "step": 383 + }, + { + "epoch": 0.18979364883232422, + "grad_norm": 0.9780134618767543, + "learning_rate": 1.9892045904566212e-05, + "loss": 0.43202030658721924, + "step": 384 + }, + { + "epoch": 0.1902879031261584, + "grad_norm": 0.9892650612917288, + "learning_rate": 1.9890845275694197e-05, + "loss": 0.3984760344028473, + "step": 385 + }, + { + "epoch": 0.19078215741999258, + "grad_norm": 0.9818585745680383, + "learning_rate": 1.9889638043835203e-05, + "loss": 0.41927874088287354, + "step": 386 + }, + { + "epoch": 0.19127641171382676, + "grad_norm": 0.8767703705433573, + "learning_rate": 1.9888424209795153e-05, + "loss": 0.3809741735458374, + "step": 387 + }, + { + "epoch": 0.19177066600766093, + "grad_norm": 0.9482820311569345, + "learning_rate": 1.988720377438439e-05, + "loss": 0.4237920045852661, + "step": 388 + }, + { + "epoch": 0.1922649203014951, + "grad_norm": 1.0327070863618417, + "learning_rate": 1.9885976738417662e-05, + "loss": 0.4065277576446533, + "step": 389 + }, + { + "epoch": 0.1927591745953293, + "grad_norm": 0.9237977569787911, + "learning_rate": 1.9884743102714116e-05, + "loss": 0.41154375672340393, + "step": 390 + }, + { + "epoch": 0.19325342888916347, + "grad_norm": 1.2326124039761357, + "learning_rate": 1.9883502868097304e-05, + "loss": 0.46544453501701355, + "step": 391 + }, + { + "epoch": 0.19374768318299765, + "grad_norm": 0.9587510645484782, + "learning_rate": 1.9882256035395204e-05, + "loss": 0.41279950737953186, + "step": 392 + }, + { + "epoch": 0.19424193747683183, + "grad_norm": 0.861022204519604, + "learning_rate": 1.988100260544017e-05, + "loss": 0.40083667635917664, + "step": 393 + }, + { + "epoch": 0.194736191770666, + "grad_norm": 0.8790820180214292, + "learning_rate": 1.9879742579068976e-05, + "loss": 0.40041595697402954, + "step": 394 + }, + { + "epoch": 0.19523044606450018, + "grad_norm": 1.0258873082657662, + "learning_rate": 1.9878475957122803e-05, + "loss": 0.45317894220352173, + "step": 395 + }, + { + "epoch": 0.19572470035833436, + "grad_norm": 0.9348755525455025, + "learning_rate": 1.987720274044723e-05, + "loss": 0.4163329005241394, + "step": 396 + }, + { + "epoch": 0.19621895465216854, + "grad_norm": 0.9706842353465618, + "learning_rate": 1.9875922929892235e-05, + "loss": 0.4252028167247772, + "step": 397 + }, + { + "epoch": 0.19671320894600272, + "grad_norm": 0.9127590943033566, + "learning_rate": 1.9874636526312202e-05, + "loss": 0.40558624267578125, + "step": 398 + }, + { + "epoch": 0.1972074632398369, + "grad_norm": 0.9762994418484081, + "learning_rate": 1.9873343530565913e-05, + "loss": 0.4352114796638489, + "step": 399 + }, + { + "epoch": 0.19770171753367108, + "grad_norm": 0.9123271316620398, + "learning_rate": 1.9872043943516556e-05, + "loss": 0.4076879024505615, + "step": 400 + }, + { + "epoch": 0.19819597182750526, + "grad_norm": 0.9627661884342358, + "learning_rate": 1.987073776603172e-05, + "loss": 0.4406166672706604, + "step": 401 + }, + { + "epoch": 0.19869022612133944, + "grad_norm": 0.8833048421451372, + "learning_rate": 1.9869424998983386e-05, + "loss": 0.3974360227584839, + "step": 402 + }, + { + "epoch": 0.19918448041517361, + "grad_norm": 0.8808806866223299, + "learning_rate": 1.9868105643247934e-05, + "loss": 0.4297831058502197, + "step": 403 + }, + { + "epoch": 0.1996787347090078, + "grad_norm": 0.9793340004481055, + "learning_rate": 1.986677969970616e-05, + "loss": 0.4214811623096466, + "step": 404 + }, + { + "epoch": 0.20017298900284197, + "grad_norm": 0.8979387674277745, + "learning_rate": 1.9865447169243234e-05, + "loss": 0.37227538228034973, + "step": 405 + }, + { + "epoch": 0.20066724329667615, + "grad_norm": 0.9492862396661451, + "learning_rate": 1.986410805274874e-05, + "loss": 0.4367320239543915, + "step": 406 + }, + { + "epoch": 0.20116149759051033, + "grad_norm": 0.9753990450504955, + "learning_rate": 1.9862762351116646e-05, + "loss": 0.4327583909034729, + "step": 407 + }, + { + "epoch": 0.2016557518843445, + "grad_norm": 0.9742332984468446, + "learning_rate": 1.9861410065245332e-05, + "loss": 0.45309939980506897, + "step": 408 + }, + { + "epoch": 0.20215000617817866, + "grad_norm": 0.9433373475369933, + "learning_rate": 1.986005119603756e-05, + "loss": 0.39196106791496277, + "step": 409 + }, + { + "epoch": 0.20264426047201284, + "grad_norm": 0.9834536288459345, + "learning_rate": 1.985868574440049e-05, + "loss": 0.4037923812866211, + "step": 410 + }, + { + "epoch": 0.20313851476584702, + "grad_norm": 0.9331733674072598, + "learning_rate": 1.9857313711245684e-05, + "loss": 0.41214677691459656, + "step": 411 + }, + { + "epoch": 0.2036327690596812, + "grad_norm": 0.9676344806099859, + "learning_rate": 1.9855935097489087e-05, + "loss": 0.4265231192111969, + "step": 412 + }, + { + "epoch": 0.20412702335351537, + "grad_norm": 0.9398051984820485, + "learning_rate": 1.9854549904051046e-05, + "loss": 0.4245712161064148, + "step": 413 + }, + { + "epoch": 0.20462127764734955, + "grad_norm": 1.0688359248893853, + "learning_rate": 1.985315813185629e-05, + "loss": 0.36296984553337097, + "step": 414 + }, + { + "epoch": 0.20511553194118373, + "grad_norm": 0.8752111789079005, + "learning_rate": 1.985175978183395e-05, + "loss": 0.3982447683811188, + "step": 415 + }, + { + "epoch": 0.2056097862350179, + "grad_norm": 0.9696106773901182, + "learning_rate": 1.9850354854917543e-05, + "loss": 0.4087941646575928, + "step": 416 + }, + { + "epoch": 0.2061040405288521, + "grad_norm": 0.9068111697273192, + "learning_rate": 1.9848943352044982e-05, + "loss": 0.4147699177265167, + "step": 417 + }, + { + "epoch": 0.20659829482268627, + "grad_norm": 0.9679150237458849, + "learning_rate": 1.9847525274158562e-05, + "loss": 0.42588335275650024, + "step": 418 + }, + { + "epoch": 0.20709254911652045, + "grad_norm": 0.8455247598954041, + "learning_rate": 1.9846100622204975e-05, + "loss": 0.42607247829437256, + "step": 419 + }, + { + "epoch": 0.20758680341035463, + "grad_norm": 0.8383230576354441, + "learning_rate": 1.9844669397135292e-05, + "loss": 0.3600303530693054, + "step": 420 + }, + { + "epoch": 0.2080810577041888, + "grad_norm": 0.9989742736396935, + "learning_rate": 1.9843231599904988e-05, + "loss": 0.47888651490211487, + "step": 421 + }, + { + "epoch": 0.20857531199802298, + "grad_norm": 0.9050077435994102, + "learning_rate": 1.9841787231473906e-05, + "loss": 0.3789903521537781, + "step": 422 + }, + { + "epoch": 0.20906956629185716, + "grad_norm": 0.9737429395044322, + "learning_rate": 1.9840336292806292e-05, + "loss": 0.3682858943939209, + "step": 423 + }, + { + "epoch": 0.20956382058569134, + "grad_norm": 0.9565489819657318, + "learning_rate": 1.9838878784870772e-05, + "loss": 0.42071375250816345, + "step": 424 + }, + { + "epoch": 0.21005807487952552, + "grad_norm": 0.8997646005118014, + "learning_rate": 1.9837414708640353e-05, + "loss": 0.4258945882320404, + "step": 425 + }, + { + "epoch": 0.2105523291733597, + "grad_norm": 0.8773247199262179, + "learning_rate": 1.9835944065092433e-05, + "loss": 0.42377644777297974, + "step": 426 + }, + { + "epoch": 0.21104658346719388, + "grad_norm": 0.8695535067011908, + "learning_rate": 1.9834466855208795e-05, + "loss": 0.35860198736190796, + "step": 427 + }, + { + "epoch": 0.21154083776102806, + "grad_norm": 0.8547283257189083, + "learning_rate": 1.9832983079975606e-05, + "loss": 0.3498537242412567, + "step": 428 + }, + { + "epoch": 0.21203509205486223, + "grad_norm": 0.9645117506541977, + "learning_rate": 1.9831492740383405e-05, + "loss": 0.3779754042625427, + "step": 429 + }, + { + "epoch": 0.2125293463486964, + "grad_norm": 0.9052431386511324, + "learning_rate": 1.9829995837427124e-05, + "loss": 0.3574570119380951, + "step": 430 + }, + { + "epoch": 0.2130236006425306, + "grad_norm": 0.9528105437455127, + "learning_rate": 1.982849237210608e-05, + "loss": 0.40678369998931885, + "step": 431 + }, + { + "epoch": 0.21351785493636477, + "grad_norm": 1.0383565017869998, + "learning_rate": 1.9826982345423955e-05, + "loss": 0.4392494261264801, + "step": 432 + }, + { + "epoch": 0.21401210923019895, + "grad_norm": 0.9595788699726988, + "learning_rate": 1.982546575838883e-05, + "loss": 0.3858703374862671, + "step": 433 + }, + { + "epoch": 0.21450636352403313, + "grad_norm": 1.022569300933342, + "learning_rate": 1.9823942612013153e-05, + "loss": 0.4427873492240906, + "step": 434 + }, + { + "epoch": 0.21500061781786728, + "grad_norm": 1.0243841009335557, + "learning_rate": 1.9822412907313756e-05, + "loss": 0.40610629320144653, + "step": 435 + }, + { + "epoch": 0.21549487211170146, + "grad_norm": 1.0647698522638835, + "learning_rate": 1.9820876645311847e-05, + "loss": 0.4181024432182312, + "step": 436 + }, + { + "epoch": 0.21598912640553564, + "grad_norm": 0.9101041422869367, + "learning_rate": 1.981933382703301e-05, + "loss": 0.39591747522354126, + "step": 437 + }, + { + "epoch": 0.21648338069936982, + "grad_norm": 1.0250837449595331, + "learning_rate": 1.9817784453507215e-05, + "loss": 0.4326947033405304, + "step": 438 + }, + { + "epoch": 0.216977634993204, + "grad_norm": 1.0886150838818542, + "learning_rate": 1.98162285257688e-05, + "loss": 0.42645522952079773, + "step": 439 + }, + { + "epoch": 0.21747188928703817, + "grad_norm": 0.978930417047399, + "learning_rate": 1.9814666044856472e-05, + "loss": 0.37372538447380066, + "step": 440 + }, + { + "epoch": 0.21796614358087235, + "grad_norm": 1.0917263900138416, + "learning_rate": 1.9813097011813328e-05, + "loss": 0.44066423177719116, + "step": 441 + }, + { + "epoch": 0.21846039787470653, + "grad_norm": 0.9730835844652884, + "learning_rate": 1.9811521427686833e-05, + "loss": 0.39892369508743286, + "step": 442 + }, + { + "epoch": 0.2189546521685407, + "grad_norm": 1.003964491264553, + "learning_rate": 1.980993929352882e-05, + "loss": 0.43497514724731445, + "step": 443 + }, + { + "epoch": 0.2194489064623749, + "grad_norm": 0.9716014988350979, + "learning_rate": 1.9808350610395504e-05, + "loss": 0.3810148239135742, + "step": 444 + }, + { + "epoch": 0.21994316075620907, + "grad_norm": 1.0156931642150575, + "learning_rate": 1.9806755379347465e-05, + "loss": 0.3952462673187256, + "step": 445 + }, + { + "epoch": 0.22043741505004324, + "grad_norm": 0.8774607433571091, + "learning_rate": 1.9805153601449655e-05, + "loss": 0.39168232679367065, + "step": 446 + }, + { + "epoch": 0.22093166934387742, + "grad_norm": 0.8991272209071992, + "learning_rate": 1.98035452777714e-05, + "loss": 0.38572901487350464, + "step": 447 + }, + { + "epoch": 0.2214259236377116, + "grad_norm": 0.9468757778036829, + "learning_rate": 1.980193040938639e-05, + "loss": 0.40514758229255676, + "step": 448 + }, + { + "epoch": 0.22192017793154578, + "grad_norm": 0.9858758484436677, + "learning_rate": 1.9800308997372696e-05, + "loss": 0.4289678931236267, + "step": 449 + }, + { + "epoch": 0.22241443222537996, + "grad_norm": 1.074259689420517, + "learning_rate": 1.979868104281274e-05, + "loss": 0.4082314670085907, + "step": 450 + }, + { + "epoch": 0.22290868651921414, + "grad_norm": 0.8691392363656588, + "learning_rate": 1.979704654679333e-05, + "loss": 0.3819827735424042, + "step": 451 + }, + { + "epoch": 0.22340294081304832, + "grad_norm": 0.9538480526249539, + "learning_rate": 1.979540551040563e-05, + "loss": 0.42063748836517334, + "step": 452 + }, + { + "epoch": 0.2238971951068825, + "grad_norm": 0.9510560747426838, + "learning_rate": 1.9793757934745166e-05, + "loss": 0.41634586453437805, + "step": 453 + }, + { + "epoch": 0.22439144940071667, + "grad_norm": 0.9597511417746731, + "learning_rate": 1.979210382091184e-05, + "loss": 0.4151400625705719, + "step": 454 + }, + { + "epoch": 0.22488570369455085, + "grad_norm": 0.9461794779595009, + "learning_rate": 1.9790443170009918e-05, + "loss": 0.40609729290008545, + "step": 455 + }, + { + "epoch": 0.22537995798838503, + "grad_norm": 0.9000627758052128, + "learning_rate": 1.9788775983148022e-05, + "loss": 0.38967129588127136, + "step": 456 + }, + { + "epoch": 0.2258742122822192, + "grad_norm": 0.9437292574418441, + "learning_rate": 1.978710226143915e-05, + "loss": 0.3833470940589905, + "step": 457 + }, + { + "epoch": 0.2263684665760534, + "grad_norm": 1.0849111028533656, + "learning_rate": 1.978542200600064e-05, + "loss": 0.42918887734413147, + "step": 458 + }, + { + "epoch": 0.22686272086988757, + "grad_norm": 0.8891911900981012, + "learning_rate": 1.978373521795422e-05, + "loss": 0.3793666660785675, + "step": 459 + }, + { + "epoch": 0.22735697516372175, + "grad_norm": 0.9329571379921634, + "learning_rate": 1.978204189842596e-05, + "loss": 0.3885256350040436, + "step": 460 + }, + { + "epoch": 0.22785122945755593, + "grad_norm": 0.9612859575938862, + "learning_rate": 1.97803420485463e-05, + "loss": 0.4003330171108246, + "step": 461 + }, + { + "epoch": 0.22834548375139008, + "grad_norm": 1.0153934251086247, + "learning_rate": 1.9778635669450026e-05, + "loss": 0.4050712585449219, + "step": 462 + }, + { + "epoch": 0.22883973804522426, + "grad_norm": 0.9955917551783842, + "learning_rate": 1.9776922762276304e-05, + "loss": 0.4003967046737671, + "step": 463 + }, + { + "epoch": 0.22933399233905843, + "grad_norm": 1.0625378898456048, + "learning_rate": 1.9775203328168643e-05, + "loss": 0.4506968855857849, + "step": 464 + }, + { + "epoch": 0.2298282466328926, + "grad_norm": 0.9586656507624374, + "learning_rate": 1.9773477368274906e-05, + "loss": 0.3947281241416931, + "step": 465 + }, + { + "epoch": 0.2303225009267268, + "grad_norm": 1.0193199601021392, + "learning_rate": 1.9771744883747326e-05, + "loss": 0.4166758954524994, + "step": 466 + }, + { + "epoch": 0.23081675522056097, + "grad_norm": 0.9824293606770813, + "learning_rate": 1.9770005875742484e-05, + "loss": 0.40400344133377075, + "step": 467 + }, + { + "epoch": 0.23131100951439515, + "grad_norm": 0.9404029827561814, + "learning_rate": 1.9768260345421312e-05, + "loss": 0.4143296480178833, + "step": 468 + }, + { + "epoch": 0.23180526380822933, + "grad_norm": 1.0496759638208417, + "learning_rate": 1.976650829394911e-05, + "loss": 0.39128193259239197, + "step": 469 + }, + { + "epoch": 0.2322995181020635, + "grad_norm": 1.033325283396431, + "learning_rate": 1.9764749722495514e-05, + "loss": 0.4305758476257324, + "step": 470 + }, + { + "epoch": 0.23279377239589769, + "grad_norm": 0.9791981730439014, + "learning_rate": 1.9762984632234523e-05, + "loss": 0.41711747646331787, + "step": 471 + }, + { + "epoch": 0.23328802668973186, + "grad_norm": 0.9590482451910926, + "learning_rate": 1.976121302434449e-05, + "loss": 0.43328845500946045, + "step": 472 + }, + { + "epoch": 0.23378228098356604, + "grad_norm": 0.9134750069589276, + "learning_rate": 1.975943490000811e-05, + "loss": 0.38707420229911804, + "step": 473 + }, + { + "epoch": 0.23427653527740022, + "grad_norm": 0.9896782154106246, + "learning_rate": 1.9757650260412438e-05, + "loss": 0.390054851770401, + "step": 474 + }, + { + "epoch": 0.2347707895712344, + "grad_norm": 1.0430972668852745, + "learning_rate": 1.9755859106748875e-05, + "loss": 0.45697346329689026, + "step": 475 + }, + { + "epoch": 0.23526504386506858, + "grad_norm": 0.950214634248398, + "learning_rate": 1.9754061440213165e-05, + "loss": 0.4381307363510132, + "step": 476 + }, + { + "epoch": 0.23575929815890276, + "grad_norm": 0.9612066818802636, + "learning_rate": 1.9752257262005403e-05, + "loss": 0.4217841625213623, + "step": 477 + }, + { + "epoch": 0.23625355245273694, + "grad_norm": 0.8699003234814695, + "learning_rate": 1.9750446573330038e-05, + "loss": 0.35968005657196045, + "step": 478 + }, + { + "epoch": 0.23674780674657112, + "grad_norm": 0.8353290173002438, + "learning_rate": 1.9748629375395856e-05, + "loss": 0.3516439199447632, + "step": 479 + }, + { + "epoch": 0.2372420610404053, + "grad_norm": 0.9683111499165196, + "learning_rate": 1.9746805669415995e-05, + "loss": 0.4078671634197235, + "step": 480 + }, + { + "epoch": 0.23773631533423947, + "grad_norm": 0.967434671965903, + "learning_rate": 1.9744975456607936e-05, + "loss": 0.39654213190078735, + "step": 481 + }, + { + "epoch": 0.23823056962807365, + "grad_norm": 0.9446129798331165, + "learning_rate": 1.9743138738193498e-05, + "loss": 0.41271698474884033, + "step": 482 + }, + { + "epoch": 0.23872482392190783, + "grad_norm": 0.9563785743614732, + "learning_rate": 1.974129551539885e-05, + "loss": 0.3957251310348511, + "step": 483 + }, + { + "epoch": 0.239219078215742, + "grad_norm": 1.0318067283466978, + "learning_rate": 1.9739445789454506e-05, + "loss": 0.39857393503189087, + "step": 484 + }, + { + "epoch": 0.2397133325095762, + "grad_norm": 0.9625937520590958, + "learning_rate": 1.973758956159531e-05, + "loss": 0.4263526499271393, + "step": 485 + }, + { + "epoch": 0.24020758680341037, + "grad_norm": 0.9782583924092142, + "learning_rate": 1.9735726833060457e-05, + "loss": 0.3849489688873291, + "step": 486 + }, + { + "epoch": 0.24070184109724455, + "grad_norm": 0.9932149128826128, + "learning_rate": 1.9733857605093476e-05, + "loss": 0.431019127368927, + "step": 487 + }, + { + "epoch": 0.2411960953910787, + "grad_norm": 0.9703866882534654, + "learning_rate": 1.973198187894224e-05, + "loss": 0.3740619421005249, + "step": 488 + }, + { + "epoch": 0.24169034968491288, + "grad_norm": 0.9420951155788563, + "learning_rate": 1.9730099655858953e-05, + "loss": 0.361680269241333, + "step": 489 + }, + { + "epoch": 0.24218460397874705, + "grad_norm": 1.0045147685747362, + "learning_rate": 1.9728210937100162e-05, + "loss": 0.41683071851730347, + "step": 490 + }, + { + "epoch": 0.24267885827258123, + "grad_norm": 1.0255058564946795, + "learning_rate": 1.9726315723926746e-05, + "loss": 0.3898739516735077, + "step": 491 + }, + { + "epoch": 0.2431731125664154, + "grad_norm": 0.992746780987763, + "learning_rate": 1.9724414017603925e-05, + "loss": 0.39339032769203186, + "step": 492 + }, + { + "epoch": 0.2436673668602496, + "grad_norm": 0.9018262406248393, + "learning_rate": 1.9722505819401255e-05, + "loss": 0.401676744222641, + "step": 493 + }, + { + "epoch": 0.24416162115408377, + "grad_norm": 0.956392375337736, + "learning_rate": 1.9720591130592613e-05, + "loss": 0.3814789056777954, + "step": 494 + }, + { + "epoch": 0.24465587544791795, + "grad_norm": 1.0339059816881517, + "learning_rate": 1.9718669952456226e-05, + "loss": 0.3980346918106079, + "step": 495 + }, + { + "epoch": 0.24515012974175213, + "grad_norm": 1.0852693818985448, + "learning_rate": 1.971674228627464e-05, + "loss": 0.4222795069217682, + "step": 496 + }, + { + "epoch": 0.2456443840355863, + "grad_norm": 0.9629746856387489, + "learning_rate": 1.971480813333474e-05, + "loss": 0.3795197904109955, + "step": 497 + }, + { + "epoch": 0.24613863832942048, + "grad_norm": 1.0428831707745134, + "learning_rate": 1.971286749492774e-05, + "loss": 0.3746161460876465, + "step": 498 + }, + { + "epoch": 0.24663289262325466, + "grad_norm": 1.0211942338953277, + "learning_rate": 1.9710920372349174e-05, + "loss": 0.3552350699901581, + "step": 499 + }, + { + "epoch": 0.24712714691708884, + "grad_norm": 0.913724645727759, + "learning_rate": 1.9708966766898925e-05, + "loss": 0.39690741896629333, + "step": 500 + }, + { + "epoch": 0.24762140121092302, + "grad_norm": 1.0179277636972188, + "learning_rate": 1.9707006679881186e-05, + "loss": 0.39530014991760254, + "step": 501 + }, + { + "epoch": 0.2481156555047572, + "grad_norm": 1.0722850381631455, + "learning_rate": 1.9705040112604483e-05, + "loss": 0.41228705644607544, + "step": 502 + }, + { + "epoch": 0.24860990979859138, + "grad_norm": 0.9774177098582278, + "learning_rate": 1.9703067066381668e-05, + "loss": 0.4330476224422455, + "step": 503 + }, + { + "epoch": 0.24910416409242556, + "grad_norm": 0.9849824106564479, + "learning_rate": 1.970108754252992e-05, + "loss": 0.38365668058395386, + "step": 504 + }, + { + "epoch": 0.24959841838625973, + "grad_norm": 1.0789440281177851, + "learning_rate": 1.969910154237074e-05, + "loss": 0.4419581890106201, + "step": 505 + }, + { + "epoch": 0.2500926726800939, + "grad_norm": 1.0828116066497757, + "learning_rate": 1.9697109067229957e-05, + "loss": 0.38741230964660645, + "step": 506 + }, + { + "epoch": 0.2505869269739281, + "grad_norm": 0.9914523280251673, + "learning_rate": 1.969511011843771e-05, + "loss": 0.41751983761787415, + "step": 507 + }, + { + "epoch": 0.25108118126776224, + "grad_norm": 0.9718169799013945, + "learning_rate": 1.9693104697328477e-05, + "loss": 0.40355241298675537, + "step": 508 + }, + { + "epoch": 0.25157543556159645, + "grad_norm": 1.003225231520968, + "learning_rate": 1.9691092805241046e-05, + "loss": 0.3511045575141907, + "step": 509 + }, + { + "epoch": 0.2520696898554306, + "grad_norm": 1.1208960250871327, + "learning_rate": 1.9689074443518526e-05, + "loss": 0.38917112350463867, + "step": 510 + }, + { + "epoch": 0.2525639441492648, + "grad_norm": 0.9640213098912707, + "learning_rate": 1.968704961350835e-05, + "loss": 0.40256473422050476, + "step": 511 + }, + { + "epoch": 0.25305819844309896, + "grad_norm": 0.8857886708710384, + "learning_rate": 1.968501831656226e-05, + "loss": 0.32350897789001465, + "step": 512 + }, + { + "epoch": 0.25355245273693316, + "grad_norm": 1.0209548318094466, + "learning_rate": 1.9682980554036322e-05, + "loss": 0.36787012219429016, + "step": 513 + }, + { + "epoch": 0.2540467070307673, + "grad_norm": 1.063374274844625, + "learning_rate": 1.9680936327290924e-05, + "loss": 0.4035605490207672, + "step": 514 + }, + { + "epoch": 0.2545409613246015, + "grad_norm": 0.9437423188361623, + "learning_rate": 1.9678885637690755e-05, + "loss": 0.39402660727500916, + "step": 515 + }, + { + "epoch": 0.2550352156184357, + "grad_norm": 1.1793476229973228, + "learning_rate": 1.967682848660483e-05, + "loss": 0.37553271651268005, + "step": 516 + }, + { + "epoch": 0.2555294699122699, + "grad_norm": 1.047789732428987, + "learning_rate": 1.9674764875406472e-05, + "loss": 0.40148675441741943, + "step": 517 + }, + { + "epoch": 0.25602372420610403, + "grad_norm": 1.1994265366678782, + "learning_rate": 1.967269480547332e-05, + "loss": 0.45255252718925476, + "step": 518 + }, + { + "epoch": 0.25651797849993824, + "grad_norm": 1.0116666478277523, + "learning_rate": 1.9670618278187318e-05, + "loss": 0.4183574616909027, + "step": 519 + }, + { + "epoch": 0.2570122327937724, + "grad_norm": 0.9518606397664687, + "learning_rate": 1.9668535294934733e-05, + "loss": 0.3950796127319336, + "step": 520 + }, + { + "epoch": 0.2575064870876066, + "grad_norm": 0.9729673190351172, + "learning_rate": 1.9666445857106132e-05, + "loss": 0.4062424898147583, + "step": 521 + }, + { + "epoch": 0.25800074138144075, + "grad_norm": 0.9474577180562711, + "learning_rate": 1.966434996609639e-05, + "loss": 0.4095906913280487, + "step": 522 + }, + { + "epoch": 0.25849499567527495, + "grad_norm": 1.1739974412660419, + "learning_rate": 1.96622476233047e-05, + "loss": 0.42302393913269043, + "step": 523 + }, + { + "epoch": 0.2589892499691091, + "grad_norm": 1.0746371790844444, + "learning_rate": 1.966013883013455e-05, + "loss": 0.43204039335250854, + "step": 524 + }, + { + "epoch": 0.2594835042629433, + "grad_norm": 0.9744852361980706, + "learning_rate": 1.9658023587993748e-05, + "loss": 0.39941906929016113, + "step": 525 + }, + { + "epoch": 0.25997775855677746, + "grad_norm": 0.9322675006976836, + "learning_rate": 1.9655901898294397e-05, + "loss": 0.37053728103637695, + "step": 526 + }, + { + "epoch": 0.26047201285061167, + "grad_norm": 0.9500036404091089, + "learning_rate": 1.96537737624529e-05, + "loss": 0.4126317501068115, + "step": 527 + }, + { + "epoch": 0.2609662671444458, + "grad_norm": 0.9592560956850021, + "learning_rate": 1.9651639181889975e-05, + "loss": 0.42397794127464294, + "step": 528 + }, + { + "epoch": 0.26146052143827997, + "grad_norm": 1.09730750123291, + "learning_rate": 1.964949815803064e-05, + "loss": 0.3606872260570526, + "step": 529 + }, + { + "epoch": 0.2619547757321142, + "grad_norm": 1.0256203362936218, + "learning_rate": 1.9647350692304206e-05, + "loss": 0.420923113822937, + "step": 530 + }, + { + "epoch": 0.2624490300259483, + "grad_norm": 1.0242401280009386, + "learning_rate": 1.9645196786144298e-05, + "loss": 0.41700440645217896, + "step": 531 + }, + { + "epoch": 0.26294328431978253, + "grad_norm": 0.9861507549209962, + "learning_rate": 1.9643036440988825e-05, + "loss": 0.3961814045906067, + "step": 532 + }, + { + "epoch": 0.2634375386136167, + "grad_norm": 0.9400998714081333, + "learning_rate": 1.9640869658280005e-05, + "loss": 0.4025250971317291, + "step": 533 + }, + { + "epoch": 0.2639317929074509, + "grad_norm": 1.0201682019086518, + "learning_rate": 1.9638696439464357e-05, + "loss": 0.38828611373901367, + "step": 534 + }, + { + "epoch": 0.26442604720128504, + "grad_norm": 0.8944214314341241, + "learning_rate": 1.963651678599268e-05, + "loss": 0.3109109401702881, + "step": 535 + }, + { + "epoch": 0.26492030149511925, + "grad_norm": 1.0758326810562073, + "learning_rate": 1.963433069932009e-05, + "loss": 0.41516438126564026, + "step": 536 + }, + { + "epoch": 0.2654145557889534, + "grad_norm": 0.972035022615468, + "learning_rate": 1.9632138180905982e-05, + "loss": 0.3765295743942261, + "step": 537 + }, + { + "epoch": 0.2659088100827876, + "grad_norm": 1.0590611315407708, + "learning_rate": 1.9629939232214052e-05, + "loss": 0.37631309032440186, + "step": 538 + }, + { + "epoch": 0.26640306437662176, + "grad_norm": 0.9543257606304313, + "learning_rate": 1.9627733854712286e-05, + "loss": 0.3640018403530121, + "step": 539 + }, + { + "epoch": 0.26689731867045596, + "grad_norm": 1.0213174253270256, + "learning_rate": 1.9625522049872962e-05, + "loss": 0.3971521854400635, + "step": 540 + }, + { + "epoch": 0.2673915729642901, + "grad_norm": 1.0059131210770185, + "learning_rate": 1.962330381917265e-05, + "loss": 0.4218612313270569, + "step": 541 + }, + { + "epoch": 0.2678858272581243, + "grad_norm": 1.0124871124462342, + "learning_rate": 1.9621079164092203e-05, + "loss": 0.38814622163772583, + "step": 542 + }, + { + "epoch": 0.26838008155195847, + "grad_norm": 1.0310689772428585, + "learning_rate": 1.961884808611678e-05, + "loss": 0.3912709355354309, + "step": 543 + }, + { + "epoch": 0.2688743358457927, + "grad_norm": 0.9919097213748044, + "learning_rate": 1.9616610586735808e-05, + "loss": 0.4007106423377991, + "step": 544 + }, + { + "epoch": 0.26936859013962683, + "grad_norm": 0.9871985402956727, + "learning_rate": 1.9614366667443016e-05, + "loss": 0.37406057119369507, + "step": 545 + }, + { + "epoch": 0.26986284443346104, + "grad_norm": 0.970768236440829, + "learning_rate": 1.961211632973641e-05, + "loss": 0.4187811613082886, + "step": 546 + }, + { + "epoch": 0.2703570987272952, + "grad_norm": 1.049304525520643, + "learning_rate": 1.960985957511828e-05, + "loss": 0.44418057799339294, + "step": 547 + }, + { + "epoch": 0.2708513530211294, + "grad_norm": 1.0048719478421346, + "learning_rate": 1.9607596405095205e-05, + "loss": 0.41016438603401184, + "step": 548 + }, + { + "epoch": 0.27134560731496354, + "grad_norm": 1.2563417457062223, + "learning_rate": 1.9605326821178047e-05, + "loss": 0.39461439847946167, + "step": 549 + }, + { + "epoch": 0.27183986160879775, + "grad_norm": 0.9443238609304102, + "learning_rate": 1.960305082488195e-05, + "loss": 0.4159786105155945, + "step": 550 + }, + { + "epoch": 0.2723341159026319, + "grad_norm": 0.9387957037755528, + "learning_rate": 1.960076841772633e-05, + "loss": 0.3702941834926605, + "step": 551 + }, + { + "epoch": 0.2728283701964661, + "grad_norm": 1.0745575617770338, + "learning_rate": 1.9598479601234894e-05, + "loss": 0.3482900559902191, + "step": 552 + }, + { + "epoch": 0.27332262449030026, + "grad_norm": 1.1412061517783256, + "learning_rate": 1.9596184376935618e-05, + "loss": 0.40550655126571655, + "step": 553 + }, + { + "epoch": 0.2738168787841344, + "grad_norm": 0.9446073244587436, + "learning_rate": 1.9593882746360767e-05, + "loss": 0.38604867458343506, + "step": 554 + }, + { + "epoch": 0.2743111330779686, + "grad_norm": 0.9388567147005249, + "learning_rate": 1.9591574711046876e-05, + "loss": 0.36586758494377136, + "step": 555 + }, + { + "epoch": 0.27480538737180277, + "grad_norm": 0.9730414125092071, + "learning_rate": 1.958926027253475e-05, + "loss": 0.37780559062957764, + "step": 556 + }, + { + "epoch": 0.275299641665637, + "grad_norm": 0.9401659835761762, + "learning_rate": 1.9586939432369486e-05, + "loss": 0.3837544322013855, + "step": 557 + }, + { + "epoch": 0.2757938959594711, + "grad_norm": 1.038905164013387, + "learning_rate": 1.9584612192100433e-05, + "loss": 0.39425861835479736, + "step": 558 + }, + { + "epoch": 0.27628815025330533, + "grad_norm": 1.0791545750316935, + "learning_rate": 1.958227855328123e-05, + "loss": 0.4008832275867462, + "step": 559 + }, + { + "epoch": 0.2767824045471395, + "grad_norm": 1.0509839705522974, + "learning_rate": 1.957993851746978e-05, + "loss": 0.42411595582962036, + "step": 560 + }, + { + "epoch": 0.2772766588409737, + "grad_norm": 1.1626138880546706, + "learning_rate": 1.9577592086228257e-05, + "loss": 0.4028055965900421, + "step": 561 + }, + { + "epoch": 0.27777091313480784, + "grad_norm": 0.9383996498843509, + "learning_rate": 1.9575239261123102e-05, + "loss": 0.3785157799720764, + "step": 562 + }, + { + "epoch": 0.27826516742864205, + "grad_norm": 0.9289370196839293, + "learning_rate": 1.9572880043725032e-05, + "loss": 0.3726264536380768, + "step": 563 + }, + { + "epoch": 0.2787594217224762, + "grad_norm": 0.9959287145902769, + "learning_rate": 1.957051443560902e-05, + "loss": 0.37261486053466797, + "step": 564 + }, + { + "epoch": 0.2792536760163104, + "grad_norm": 0.9394373844868922, + "learning_rate": 1.956814243835432e-05, + "loss": 0.34781068563461304, + "step": 565 + }, + { + "epoch": 0.27974793031014455, + "grad_norm": 0.9899407389551799, + "learning_rate": 1.956576405354444e-05, + "loss": 0.3828197121620178, + "step": 566 + }, + { + "epoch": 0.28024218460397876, + "grad_norm": 0.9387592741594649, + "learning_rate": 1.9563379282767156e-05, + "loss": 0.3839726150035858, + "step": 567 + }, + { + "epoch": 0.2807364388978129, + "grad_norm": 1.053498529947078, + "learning_rate": 1.9560988127614507e-05, + "loss": 0.3658025562763214, + "step": 568 + }, + { + "epoch": 0.2812306931916471, + "grad_norm": 1.064206434015044, + "learning_rate": 1.9558590589682795e-05, + "loss": 0.400045782327652, + "step": 569 + }, + { + "epoch": 0.28172494748548127, + "grad_norm": 0.9470530474737298, + "learning_rate": 1.955618667057258e-05, + "loss": 0.36586880683898926, + "step": 570 + }, + { + "epoch": 0.2822192017793155, + "grad_norm": 1.0137760854012388, + "learning_rate": 1.9553776371888684e-05, + "loss": 0.3886389136314392, + "step": 571 + }, + { + "epoch": 0.2827134560731496, + "grad_norm": 1.0159520278130145, + "learning_rate": 1.955135969524019e-05, + "loss": 0.37858110666275024, + "step": 572 + }, + { + "epoch": 0.28320771036698383, + "grad_norm": 0.939134880585939, + "learning_rate": 1.9548936642240435e-05, + "loss": 0.3264877498149872, + "step": 573 + }, + { + "epoch": 0.283701964660818, + "grad_norm": 1.1465399296789363, + "learning_rate": 1.9546507214507017e-05, + "loss": 0.3756924569606781, + "step": 574 + }, + { + "epoch": 0.2841962189546522, + "grad_norm": 1.0922050133590595, + "learning_rate": 1.9544071413661783e-05, + "loss": 0.3773806691169739, + "step": 575 + }, + { + "epoch": 0.28469047324848634, + "grad_norm": 1.0432958526312845, + "learning_rate": 1.9541629241330842e-05, + "loss": 0.37437382340431213, + "step": 576 + }, + { + "epoch": 0.28518472754232055, + "grad_norm": 0.9730241652440514, + "learning_rate": 1.9539180699144552e-05, + "loss": 0.3835929036140442, + "step": 577 + }, + { + "epoch": 0.2856789818361547, + "grad_norm": 1.2039096391780213, + "learning_rate": 1.9536725788737528e-05, + "loss": 0.39163681864738464, + "step": 578 + }, + { + "epoch": 0.2861732361299889, + "grad_norm": 1.1007303408462066, + "learning_rate": 1.953426451174863e-05, + "loss": 0.39241698384284973, + "step": 579 + }, + { + "epoch": 0.28666749042382306, + "grad_norm": 0.9748115984741068, + "learning_rate": 1.953179686982097e-05, + "loss": 0.32731348276138306, + "step": 580 + }, + { + "epoch": 0.2871617447176572, + "grad_norm": 0.9649406632940735, + "learning_rate": 1.9529322864601915e-05, + "loss": 0.34735041856765747, + "step": 581 + }, + { + "epoch": 0.2876559990114914, + "grad_norm": 1.0831552948058796, + "learning_rate": 1.952684249774307e-05, + "loss": 0.3795308470726013, + "step": 582 + }, + { + "epoch": 0.28815025330532557, + "grad_norm": 1.0599543241474398, + "learning_rate": 1.95243557709003e-05, + "loss": 0.3546086549758911, + "step": 583 + }, + { + "epoch": 0.28864450759915977, + "grad_norm": 0.9634030800835625, + "learning_rate": 1.9521862685733703e-05, + "loss": 0.35397839546203613, + "step": 584 + }, + { + "epoch": 0.2891387618929939, + "grad_norm": 0.972134968680729, + "learning_rate": 1.9519363243907627e-05, + "loss": 0.350521981716156, + "step": 585 + }, + { + "epoch": 0.28963301618682813, + "grad_norm": 1.0201322204570258, + "learning_rate": 1.9516857447090663e-05, + "loss": 0.380625456571579, + "step": 586 + }, + { + "epoch": 0.2901272704806623, + "grad_norm": 0.9847688200101109, + "learning_rate": 1.9514345296955647e-05, + "loss": 0.40378236770629883, + "step": 587 + }, + { + "epoch": 0.2906215247744965, + "grad_norm": 1.0122113576142937, + "learning_rate": 1.9511826795179653e-05, + "loss": 0.4050450325012207, + "step": 588 + }, + { + "epoch": 0.29111577906833064, + "grad_norm": 1.048628562831542, + "learning_rate": 1.9509301943444e-05, + "loss": 0.3772329092025757, + "step": 589 + }, + { + "epoch": 0.29161003336216484, + "grad_norm": 1.0803687765146506, + "learning_rate": 1.9506770743434244e-05, + "loss": 0.4079870581626892, + "step": 590 + }, + { + "epoch": 0.292104287655999, + "grad_norm": 1.0069688403525805, + "learning_rate": 1.950423319684017e-05, + "loss": 0.4233503043651581, + "step": 591 + }, + { + "epoch": 0.2925985419498332, + "grad_norm": 1.0403594154189246, + "learning_rate": 1.9501689305355814e-05, + "loss": 0.395530104637146, + "step": 592 + }, + { + "epoch": 0.29309279624366735, + "grad_norm": 1.0468686113369423, + "learning_rate": 1.949913907067944e-05, + "loss": 0.4266175925731659, + "step": 593 + }, + { + "epoch": 0.29358705053750156, + "grad_norm": 1.0371386643985676, + "learning_rate": 1.949658249451355e-05, + "loss": 0.4428660571575165, + "step": 594 + }, + { + "epoch": 0.2940813048313357, + "grad_norm": 0.928511699803538, + "learning_rate": 1.9494019578564874e-05, + "loss": 0.36831945180892944, + "step": 595 + }, + { + "epoch": 0.2945755591251699, + "grad_norm": 1.059362576098806, + "learning_rate": 1.949145032454438e-05, + "loss": 0.392259806394577, + "step": 596 + }, + { + "epoch": 0.29506981341900407, + "grad_norm": 0.9638882642169329, + "learning_rate": 1.948887473416727e-05, + "loss": 0.43743032217025757, + "step": 597 + }, + { + "epoch": 0.2955640677128383, + "grad_norm": 0.9566828851720006, + "learning_rate": 1.9486292809152965e-05, + "loss": 0.3725258409976959, + "step": 598 + }, + { + "epoch": 0.2960583220066724, + "grad_norm": 0.9479087116485218, + "learning_rate": 1.948370455122512e-05, + "loss": 0.39507436752319336, + "step": 599 + }, + { + "epoch": 0.29655257630050663, + "grad_norm": 1.0509892705512045, + "learning_rate": 1.9481109962111623e-05, + "loss": 0.40915870666503906, + "step": 600 + }, + { + "epoch": 0.2970468305943408, + "grad_norm": 0.9528830591600533, + "learning_rate": 1.947850904354459e-05, + "loss": 0.3465006351470947, + "step": 601 + }, + { + "epoch": 0.297541084888175, + "grad_norm": 1.0989483899383072, + "learning_rate": 1.9475901797260346e-05, + "loss": 0.4205567538738251, + "step": 602 + }, + { + "epoch": 0.29803533918200914, + "grad_norm": 0.9986003576186586, + "learning_rate": 1.9473288224999455e-05, + "loss": 0.37682560086250305, + "step": 603 + }, + { + "epoch": 0.29852959347584335, + "grad_norm": 1.0393693996744362, + "learning_rate": 1.9470668328506705e-05, + "loss": 0.3865458369255066, + "step": 604 + }, + { + "epoch": 0.2990238477696775, + "grad_norm": 0.9426218637426483, + "learning_rate": 1.9468042109531096e-05, + "loss": 0.36366063356399536, + "step": 605 + }, + { + "epoch": 0.2995181020635117, + "grad_norm": 0.9801320950707162, + "learning_rate": 1.9465409569825857e-05, + "loss": 0.3861471116542816, + "step": 606 + }, + { + "epoch": 0.30001235635734586, + "grad_norm": 1.0257103381374684, + "learning_rate": 1.9462770711148433e-05, + "loss": 0.3499199151992798, + "step": 607 + }, + { + "epoch": 0.30050661065118, + "grad_norm": 1.1030346241860873, + "learning_rate": 1.946012553526049e-05, + "loss": 0.3704417943954468, + "step": 608 + }, + { + "epoch": 0.3010008649450142, + "grad_norm": 1.0751948386377395, + "learning_rate": 1.9457474043927908e-05, + "loss": 0.41278937458992004, + "step": 609 + }, + { + "epoch": 0.30149511923884836, + "grad_norm": 1.0379271128545955, + "learning_rate": 1.9454816238920787e-05, + "loss": 0.36078256368637085, + "step": 610 + }, + { + "epoch": 0.30198937353268257, + "grad_norm": 1.05890389444684, + "learning_rate": 1.9452152122013434e-05, + "loss": 0.3713051676750183, + "step": 611 + }, + { + "epoch": 0.3024836278265167, + "grad_norm": 1.0547983951495754, + "learning_rate": 1.9449481694984382e-05, + "loss": 0.3919684886932373, + "step": 612 + }, + { + "epoch": 0.3029778821203509, + "grad_norm": 1.1211767888578545, + "learning_rate": 1.9446804959616364e-05, + "loss": 0.4249044358730316, + "step": 613 + }, + { + "epoch": 0.3034721364141851, + "grad_norm": 1.0386798112962086, + "learning_rate": 1.9444121917696335e-05, + "loss": 0.4033172130584717, + "step": 614 + }, + { + "epoch": 0.3039663907080193, + "grad_norm": 1.020453301484689, + "learning_rate": 1.9441432571015455e-05, + "loss": 0.35740789771080017, + "step": 615 + }, + { + "epoch": 0.30446064500185344, + "grad_norm": 1.0567402195641693, + "learning_rate": 1.9438736921369093e-05, + "loss": 0.41219189763069153, + "step": 616 + }, + { + "epoch": 0.30495489929568764, + "grad_norm": 1.0356137182677312, + "learning_rate": 1.9436034970556824e-05, + "loss": 0.3751283884048462, + "step": 617 + }, + { + "epoch": 0.3054491535895218, + "grad_norm": 1.0460808776118622, + "learning_rate": 1.9433326720382433e-05, + "loss": 0.40294593572616577, + "step": 618 + }, + { + "epoch": 0.305943407883356, + "grad_norm": 1.0087358245362568, + "learning_rate": 1.943061217265391e-05, + "loss": 0.4163772463798523, + "step": 619 + }, + { + "epoch": 0.30643766217719015, + "grad_norm": 1.007467123707354, + "learning_rate": 1.9427891329183444e-05, + "loss": 0.3796529769897461, + "step": 620 + }, + { + "epoch": 0.30693191647102436, + "grad_norm": 1.0905533067383615, + "learning_rate": 1.942516419178744e-05, + "loss": 0.44097092747688293, + "step": 621 + }, + { + "epoch": 0.3074261707648585, + "grad_norm": 0.9615172689674734, + "learning_rate": 1.942243076228649e-05, + "loss": 0.384232759475708, + "step": 622 + }, + { + "epoch": 0.3079204250586927, + "grad_norm": 0.9038435200954008, + "learning_rate": 1.941969104250539e-05, + "loss": 0.3734084367752075, + "step": 623 + }, + { + "epoch": 0.30841467935252687, + "grad_norm": 0.9414597847653995, + "learning_rate": 1.9416945034273142e-05, + "loss": 0.3532239496707916, + "step": 624 + }, + { + "epoch": 0.3089089336463611, + "grad_norm": 1.0668895366566058, + "learning_rate": 1.941419273942294e-05, + "loss": 0.39430537819862366, + "step": 625 + }, + { + "epoch": 0.3094031879401952, + "grad_norm": 1.0091341034087684, + "learning_rate": 1.941143415979218e-05, + "loss": 0.35790857672691345, + "step": 626 + }, + { + "epoch": 0.30989744223402943, + "grad_norm": 1.0381854826035726, + "learning_rate": 1.9408669297222446e-05, + "loss": 0.3684060871601105, + "step": 627 + }, + { + "epoch": 0.3103916965278636, + "grad_norm": 0.9553898295016832, + "learning_rate": 1.9405898153559522e-05, + "loss": 0.3425355553627014, + "step": 628 + }, + { + "epoch": 0.3108859508216978, + "grad_norm": 0.9032294986887355, + "learning_rate": 1.9403120730653387e-05, + "loss": 0.3295109272003174, + "step": 629 + }, + { + "epoch": 0.31138020511553194, + "grad_norm": 1.0576168899253493, + "learning_rate": 1.940033703035821e-05, + "loss": 0.37015989422798157, + "step": 630 + }, + { + "epoch": 0.31187445940936614, + "grad_norm": 1.1361288169710941, + "learning_rate": 1.939754705453234e-05, + "loss": 0.40625980496406555, + "step": 631 + }, + { + "epoch": 0.3123687137032003, + "grad_norm": 1.3354529260238757, + "learning_rate": 1.939475080503833e-05, + "loss": 0.42503830790519714, + "step": 632 + }, + { + "epoch": 0.31286296799703445, + "grad_norm": 1.0863606838535078, + "learning_rate": 1.939194828374292e-05, + "loss": 0.36230289936065674, + "step": 633 + }, + { + "epoch": 0.31335722229086865, + "grad_norm": 0.9800314584790245, + "learning_rate": 1.938913949251703e-05, + "loss": 0.4128720164299011, + "step": 634 + }, + { + "epoch": 0.3138514765847028, + "grad_norm": 1.1018828002960295, + "learning_rate": 1.938632443323577e-05, + "loss": 0.39706575870513916, + "step": 635 + }, + { + "epoch": 0.314345730878537, + "grad_norm": 1.0451325322820368, + "learning_rate": 1.9383503107778434e-05, + "loss": 0.38395214080810547, + "step": 636 + }, + { + "epoch": 0.31483998517237116, + "grad_norm": 0.9669746428685202, + "learning_rate": 1.9380675518028495e-05, + "loss": 0.3629944324493408, + "step": 637 + }, + { + "epoch": 0.31533423946620537, + "grad_norm": 1.0589959103814197, + "learning_rate": 1.937784166587361e-05, + "loss": 0.39474761486053467, + "step": 638 + }, + { + "epoch": 0.3158284937600395, + "grad_norm": 1.085403264447479, + "learning_rate": 1.9375001553205627e-05, + "loss": 0.423098087310791, + "step": 639 + }, + { + "epoch": 0.3163227480538737, + "grad_norm": 0.9239589256190138, + "learning_rate": 1.937215518192056e-05, + "loss": 0.3453904986381531, + "step": 640 + }, + { + "epoch": 0.3168170023477079, + "grad_norm": 0.9432054956835023, + "learning_rate": 1.9369302553918605e-05, + "loss": 0.3659127354621887, + "step": 641 + }, + { + "epoch": 0.3173112566415421, + "grad_norm": 1.060860081964917, + "learning_rate": 1.9366443671104132e-05, + "loss": 0.3613426089286804, + "step": 642 + }, + { + "epoch": 0.31780551093537623, + "grad_norm": 0.9515218135636598, + "learning_rate": 1.93635785353857e-05, + "loss": 0.3556531071662903, + "step": 643 + }, + { + "epoch": 0.31829976522921044, + "grad_norm": 0.9893630091198329, + "learning_rate": 1.9360707148676022e-05, + "loss": 0.3515596091747284, + "step": 644 + }, + { + "epoch": 0.3187940195230446, + "grad_norm": 0.9802147109168395, + "learning_rate": 1.9357829512892e-05, + "loss": 0.36270469427108765, + "step": 645 + }, + { + "epoch": 0.3192882738168788, + "grad_norm": 0.9936651325349853, + "learning_rate": 1.9354945629954706e-05, + "loss": 0.3617076277732849, + "step": 646 + }, + { + "epoch": 0.31978252811071295, + "grad_norm": 1.0835943099678094, + "learning_rate": 1.9352055501789376e-05, + "loss": 0.3888331949710846, + "step": 647 + }, + { + "epoch": 0.32027678240454716, + "grad_norm": 1.0454884563674065, + "learning_rate": 1.9349159130325413e-05, + "loss": 0.41199982166290283, + "step": 648 + }, + { + "epoch": 0.3207710366983813, + "grad_norm": 1.0758693507529822, + "learning_rate": 1.93462565174964e-05, + "loss": 0.3878370225429535, + "step": 649 + }, + { + "epoch": 0.3212652909922155, + "grad_norm": 1.0303850194409756, + "learning_rate": 1.9343347665240077e-05, + "loss": 0.380184531211853, + "step": 650 + }, + { + "epoch": 0.32175954528604966, + "grad_norm": 1.143999159363527, + "learning_rate": 1.9340432575498355e-05, + "loss": 0.3746795356273651, + "step": 651 + }, + { + "epoch": 0.32225379957988387, + "grad_norm": 1.0188863097829193, + "learning_rate": 1.93375112502173e-05, + "loss": 0.3700905442237854, + "step": 652 + }, + { + "epoch": 0.322748053873718, + "grad_norm": 0.9032826115280742, + "learning_rate": 1.9334583691347153e-05, + "loss": 0.3331850469112396, + "step": 653 + }, + { + "epoch": 0.32324230816755223, + "grad_norm": 0.949854268007892, + "learning_rate": 1.933164990084231e-05, + "loss": 0.3397464156150818, + "step": 654 + }, + { + "epoch": 0.3237365624613864, + "grad_norm": 1.1199806793436613, + "learning_rate": 1.9328709880661326e-05, + "loss": 0.3837242126464844, + "step": 655 + }, + { + "epoch": 0.3242308167552206, + "grad_norm": 1.0801449332087112, + "learning_rate": 1.9325763632766916e-05, + "loss": 0.38854193687438965, + "step": 656 + }, + { + "epoch": 0.32472507104905474, + "grad_norm": 1.1330798719469783, + "learning_rate": 1.9322811159125955e-05, + "loss": 0.41792556643486023, + "step": 657 + }, + { + "epoch": 0.32521932534288894, + "grad_norm": 0.9831880252943476, + "learning_rate": 1.931985246170947e-05, + "loss": 0.3968243896961212, + "step": 658 + }, + { + "epoch": 0.3257135796367231, + "grad_norm": 1.0416971268065567, + "learning_rate": 1.9316887542492645e-05, + "loss": 0.41183531284332275, + "step": 659 + }, + { + "epoch": 0.32620783393055724, + "grad_norm": 1.0367106782684, + "learning_rate": 1.931391640345482e-05, + "loss": 0.36057350039482117, + "step": 660 + }, + { + "epoch": 0.32670208822439145, + "grad_norm": 1.0663955736026025, + "learning_rate": 1.9310939046579482e-05, + "loss": 0.36032363772392273, + "step": 661 + }, + { + "epoch": 0.3271963425182256, + "grad_norm": 0.9657326304523917, + "learning_rate": 1.9307955473854275e-05, + "loss": 0.3682931363582611, + "step": 662 + }, + { + "epoch": 0.3276905968120598, + "grad_norm": 1.004896861978755, + "learning_rate": 1.9304965687270987e-05, + "loss": 0.3829198181629181, + "step": 663 + }, + { + "epoch": 0.32818485110589396, + "grad_norm": 1.0180253035605964, + "learning_rate": 1.930196968882556e-05, + "loss": 0.3901137709617615, + "step": 664 + }, + { + "epoch": 0.32867910539972817, + "grad_norm": 0.9037607838463562, + "learning_rate": 1.9298967480518077e-05, + "loss": 0.34352344274520874, + "step": 665 + }, + { + "epoch": 0.3291733596935623, + "grad_norm": 0.9918701152773953, + "learning_rate": 1.9295959064352767e-05, + "loss": 0.38822662830352783, + "step": 666 + }, + { + "epoch": 0.3296676139873965, + "grad_norm": 0.9619347095581623, + "learning_rate": 1.9292944442338013e-05, + "loss": 0.3639586567878723, + "step": 667 + }, + { + "epoch": 0.3301618682812307, + "grad_norm": 1.0248410702019595, + "learning_rate": 1.9289923616486326e-05, + "loss": 0.38537997007369995, + "step": 668 + }, + { + "epoch": 0.3306561225750649, + "grad_norm": 0.9469693142742907, + "learning_rate": 1.9286896588814373e-05, + "loss": 0.3514263331890106, + "step": 669 + }, + { + "epoch": 0.33115037686889903, + "grad_norm": 0.9776369401143131, + "learning_rate": 1.928386336134295e-05, + "loss": 0.3873803913593292, + "step": 670 + }, + { + "epoch": 0.33164463116273324, + "grad_norm": 1.0063829461952047, + "learning_rate": 1.9280823936096994e-05, + "loss": 0.36644282937049866, + "step": 671 + }, + { + "epoch": 0.3321388854565674, + "grad_norm": 0.8900960907324665, + "learning_rate": 1.9277778315105587e-05, + "loss": 0.34837427735328674, + "step": 672 + }, + { + "epoch": 0.3326331397504016, + "grad_norm": 1.0946494998655654, + "learning_rate": 1.927472650040194e-05, + "loss": 0.3879021406173706, + "step": 673 + }, + { + "epoch": 0.33312739404423575, + "grad_norm": 1.0256193203663788, + "learning_rate": 1.9271668494023404e-05, + "loss": 0.3753926753997803, + "step": 674 + }, + { + "epoch": 0.33362164833806995, + "grad_norm": 1.1193381317991955, + "learning_rate": 1.9268604298011454e-05, + "loss": 0.35362815856933594, + "step": 675 + }, + { + "epoch": 0.3341159026319041, + "grad_norm": 1.0612190451852097, + "learning_rate": 1.926553391441171e-05, + "loss": 0.3685564696788788, + "step": 676 + }, + { + "epoch": 0.3346101569257383, + "grad_norm": 1.2837359031878948, + "learning_rate": 1.926245734527391e-05, + "loss": 0.42326927185058594, + "step": 677 + }, + { + "epoch": 0.33510441121957246, + "grad_norm": 1.0247968871472715, + "learning_rate": 1.925937459265193e-05, + "loss": 0.35918861627578735, + "step": 678 + }, + { + "epoch": 0.33559866551340667, + "grad_norm": 1.1358099673309532, + "learning_rate": 1.9256285658603773e-05, + "loss": 0.38703471422195435, + "step": 679 + }, + { + "epoch": 0.3360929198072408, + "grad_norm": 1.0232813577835114, + "learning_rate": 1.9253190545191567e-05, + "loss": 0.3993009924888611, + "step": 680 + }, + { + "epoch": 0.336587174101075, + "grad_norm": 1.178587285681796, + "learning_rate": 1.9250089254481566e-05, + "loss": 0.3998498320579529, + "step": 681 + }, + { + "epoch": 0.3370814283949092, + "grad_norm": 1.0577657705862298, + "learning_rate": 1.9246981788544145e-05, + "loss": 0.37211501598358154, + "step": 682 + }, + { + "epoch": 0.3375756826887434, + "grad_norm": 1.0126592857393306, + "learning_rate": 1.9243868149453806e-05, + "loss": 0.37204745411872864, + "step": 683 + }, + { + "epoch": 0.33806993698257753, + "grad_norm": 0.9626025917248462, + "learning_rate": 1.924074833928917e-05, + "loss": 0.3784663677215576, + "step": 684 + }, + { + "epoch": 0.33856419127641174, + "grad_norm": 1.0085796667337208, + "learning_rate": 1.9237622360132975e-05, + "loss": 0.4140951633453369, + "step": 685 + }, + { + "epoch": 0.3390584455702459, + "grad_norm": 1.0251059918961796, + "learning_rate": 1.9234490214072083e-05, + "loss": 0.3723721504211426, + "step": 686 + }, + { + "epoch": 0.33955269986408004, + "grad_norm": 1.0704762953012439, + "learning_rate": 1.923135190319747e-05, + "loss": 0.3714251220226288, + "step": 687 + }, + { + "epoch": 0.34004695415791425, + "grad_norm": 2.286186750342226, + "learning_rate": 1.9228207429604224e-05, + "loss": 0.3551461696624756, + "step": 688 + }, + { + "epoch": 0.3405412084517484, + "grad_norm": 1.0184392375158444, + "learning_rate": 1.9225056795391554e-05, + "loss": 0.3543378412723541, + "step": 689 + }, + { + "epoch": 0.3410354627455826, + "grad_norm": 0.9670805241747071, + "learning_rate": 1.922190000266278e-05, + "loss": 0.3405894935131073, + "step": 690 + }, + { + "epoch": 0.34152971703941676, + "grad_norm": 1.0375943311061684, + "learning_rate": 1.9218737053525324e-05, + "loss": 0.36478808522224426, + "step": 691 + }, + { + "epoch": 0.34202397133325096, + "grad_norm": 1.036881907490894, + "learning_rate": 1.9215567950090734e-05, + "loss": 0.39778709411621094, + "step": 692 + }, + { + "epoch": 0.3425182256270851, + "grad_norm": 0.9719804294561131, + "learning_rate": 1.9212392694474654e-05, + "loss": 0.3553788661956787, + "step": 693 + }, + { + "epoch": 0.3430124799209193, + "grad_norm": 1.0265620111261864, + "learning_rate": 1.920921128879684e-05, + "loss": 0.3393115997314453, + "step": 694 + }, + { + "epoch": 0.3435067342147535, + "grad_norm": 1.2003228723584403, + "learning_rate": 1.9206023735181154e-05, + "loss": 0.4240456819534302, + "step": 695 + }, + { + "epoch": 0.3440009885085877, + "grad_norm": 1.0687040296992496, + "learning_rate": 1.920283003575556e-05, + "loss": 0.3451164960861206, + "step": 696 + }, + { + "epoch": 0.34449524280242183, + "grad_norm": 1.0859108204006387, + "learning_rate": 1.919963019265213e-05, + "loss": 0.4328063726425171, + "step": 697 + }, + { + "epoch": 0.34498949709625604, + "grad_norm": 0.9953984300461581, + "learning_rate": 1.9196424208007026e-05, + "loss": 0.35965877771377563, + "step": 698 + }, + { + "epoch": 0.3454837513900902, + "grad_norm": 1.0276560460371096, + "learning_rate": 1.9193212083960522e-05, + "loss": 0.40995267033576965, + "step": 699 + }, + { + "epoch": 0.3459780056839244, + "grad_norm": 1.047717179086883, + "learning_rate": 1.9189993822656984e-05, + "loss": 0.373586505651474, + "step": 700 + }, + { + "epoch": 0.34647225997775855, + "grad_norm": 0.967832395747722, + "learning_rate": 1.918676942624488e-05, + "loss": 0.3651657998561859, + "step": 701 + }, + { + "epoch": 0.34696651427159275, + "grad_norm": 0.9154206667420104, + "learning_rate": 1.918353889687677e-05, + "loss": 0.3333090543746948, + "step": 702 + }, + { + "epoch": 0.3474607685654269, + "grad_norm": 1.109347895406641, + "learning_rate": 1.9180302236709312e-05, + "loss": 0.444000780582428, + "step": 703 + }, + { + "epoch": 0.3479550228592611, + "grad_norm": 0.9543494832625998, + "learning_rate": 1.917705944790325e-05, + "loss": 0.34942537546157837, + "step": 704 + }, + { + "epoch": 0.34844927715309526, + "grad_norm": 1.206317081042567, + "learning_rate": 1.9173810532623425e-05, + "loss": 0.4709789752960205, + "step": 705 + }, + { + "epoch": 0.34894353144692947, + "grad_norm": 1.0126287373930702, + "learning_rate": 1.917055549303877e-05, + "loss": 0.3615723252296448, + "step": 706 + }, + { + "epoch": 0.3494377857407636, + "grad_norm": 0.98553805717422, + "learning_rate": 1.9167294331322293e-05, + "loss": 0.366035133600235, + "step": 707 + }, + { + "epoch": 0.3499320400345978, + "grad_norm": 1.085095649211616, + "learning_rate": 1.9164027049651105e-05, + "loss": 0.3916548490524292, + "step": 708 + }, + { + "epoch": 0.350426294328432, + "grad_norm": 1.0423550617328055, + "learning_rate": 1.91607536502064e-05, + "loss": 0.3752925992012024, + "step": 709 + }, + { + "epoch": 0.3509205486222662, + "grad_norm": 1.0859051595052658, + "learning_rate": 1.9157474135173448e-05, + "loss": 0.3471261262893677, + "step": 710 + }, + { + "epoch": 0.35141480291610033, + "grad_norm": 1.0011428490015388, + "learning_rate": 1.9154188506741605e-05, + "loss": 0.36898115277290344, + "step": 711 + }, + { + "epoch": 0.3519090572099345, + "grad_norm": 1.0150877470647623, + "learning_rate": 1.9150896767104315e-05, + "loss": 0.38236287236213684, + "step": 712 + }, + { + "epoch": 0.3524033115037687, + "grad_norm": 1.0813644645593066, + "learning_rate": 1.9147598918459096e-05, + "loss": 0.39260241389274597, + "step": 713 + }, + { + "epoch": 0.35289756579760284, + "grad_norm": 0.988095993083205, + "learning_rate": 1.9144294963007542e-05, + "loss": 0.3699083626270294, + "step": 714 + }, + { + "epoch": 0.35339182009143705, + "grad_norm": 0.9649609380548236, + "learning_rate": 1.914098490295532e-05, + "loss": 0.37720543146133423, + "step": 715 + }, + { + "epoch": 0.3538860743852712, + "grad_norm": 0.8834082509396699, + "learning_rate": 1.9137668740512195e-05, + "loss": 0.298441082239151, + "step": 716 + }, + { + "epoch": 0.3543803286791054, + "grad_norm": 0.996165149875045, + "learning_rate": 1.913434647789197e-05, + "loss": 0.3867550194263458, + "step": 717 + }, + { + "epoch": 0.35487458297293956, + "grad_norm": 0.9824732772890364, + "learning_rate": 1.913101811731256e-05, + "loss": 0.37111300230026245, + "step": 718 + }, + { + "epoch": 0.35536883726677376, + "grad_norm": 0.9874274570055057, + "learning_rate": 1.9127683660995916e-05, + "loss": 0.3922812342643738, + "step": 719 + }, + { + "epoch": 0.3558630915606079, + "grad_norm": 1.0744489462576237, + "learning_rate": 1.9124343111168077e-05, + "loss": 0.3878915309906006, + "step": 720 + }, + { + "epoch": 0.3563573458544421, + "grad_norm": 0.9551023310729483, + "learning_rate": 1.9120996470059153e-05, + "loss": 0.34974879026412964, + "step": 721 + }, + { + "epoch": 0.35685160014827627, + "grad_norm": 1.0403250728390605, + "learning_rate": 1.9117643739903306e-05, + "loss": 0.38341426849365234, + "step": 722 + }, + { + "epoch": 0.3573458544421105, + "grad_norm": 0.9876921724558848, + "learning_rate": 1.9114284922938772e-05, + "loss": 0.32610252499580383, + "step": 723 + }, + { + "epoch": 0.35784010873594463, + "grad_norm": 1.0486464385186933, + "learning_rate": 1.9110920021407855e-05, + "loss": 0.37203550338745117, + "step": 724 + }, + { + "epoch": 0.35833436302977884, + "grad_norm": 1.0809240289061282, + "learning_rate": 1.9107549037556906e-05, + "loss": 0.2954786419868469, + "step": 725 + }, + { + "epoch": 0.358828617323613, + "grad_norm": 0.9795897601711951, + "learning_rate": 1.9104171973636353e-05, + "loss": 0.33074450492858887, + "step": 726 + }, + { + "epoch": 0.3593228716174472, + "grad_norm": 1.0341587070514209, + "learning_rate": 1.9100788831900676e-05, + "loss": 0.350687712430954, + "step": 727 + }, + { + "epoch": 0.35981712591128134, + "grad_norm": 1.143909518582956, + "learning_rate": 1.9097399614608406e-05, + "loss": 0.3635619878768921, + "step": 728 + }, + { + "epoch": 0.36031138020511555, + "grad_norm": 1.0607740871884148, + "learning_rate": 1.909400432402214e-05, + "loss": 0.36409544944763184, + "step": 729 + }, + { + "epoch": 0.3608056344989497, + "grad_norm": 1.069313873032721, + "learning_rate": 1.9090602962408523e-05, + "loss": 0.4109501540660858, + "step": 730 + }, + { + "epoch": 0.3612998887927839, + "grad_norm": 1.0147750628685799, + "learning_rate": 1.908719553203826e-05, + "loss": 0.337943971157074, + "step": 731 + }, + { + "epoch": 0.36179414308661806, + "grad_norm": 1.0957860180414656, + "learning_rate": 1.9083782035186097e-05, + "loss": 0.36411553621292114, + "step": 732 + }, + { + "epoch": 0.36228839738045227, + "grad_norm": 1.1570738944902594, + "learning_rate": 1.908036247413084e-05, + "loss": 0.3513786494731903, + "step": 733 + }, + { + "epoch": 0.3627826516742864, + "grad_norm": 1.156885907892102, + "learning_rate": 1.907693685115534e-05, + "loss": 0.4017047584056854, + "step": 734 + }, + { + "epoch": 0.3632769059681206, + "grad_norm": 1.0932284273900412, + "learning_rate": 1.907350516854649e-05, + "loss": 0.3780835270881653, + "step": 735 + }, + { + "epoch": 0.3637711602619548, + "grad_norm": 1.10688269569213, + "learning_rate": 1.9070067428595234e-05, + "loss": 0.35562777519226074, + "step": 736 + }, + { + "epoch": 0.364265414555789, + "grad_norm": 1.0784034928358046, + "learning_rate": 1.9066623633596556e-05, + "loss": 0.34880492091178894, + "step": 737 + }, + { + "epoch": 0.36475966884962313, + "grad_norm": 1.1213824671894879, + "learning_rate": 1.9063173785849488e-05, + "loss": 0.3798677921295166, + "step": 738 + }, + { + "epoch": 0.3652539231434573, + "grad_norm": 1.0300538330170659, + "learning_rate": 1.9059717887657098e-05, + "loss": 0.371119886636734, + "step": 739 + }, + { + "epoch": 0.3657481774372915, + "grad_norm": 1.075537593372937, + "learning_rate": 1.9056255941326497e-05, + "loss": 0.3845891058444977, + "step": 740 + }, + { + "epoch": 0.36624243173112564, + "grad_norm": 1.0460904589757556, + "learning_rate": 1.9052787949168823e-05, + "loss": 0.34627166390419006, + "step": 741 + }, + { + "epoch": 0.36673668602495985, + "grad_norm": 1.0588032623720978, + "learning_rate": 1.9049313913499266e-05, + "loss": 0.3872081935405731, + "step": 742 + }, + { + "epoch": 0.367230940318794, + "grad_norm": 1.0173727289332204, + "learning_rate": 1.9045833836637038e-05, + "loss": 0.40446269512176514, + "step": 743 + }, + { + "epoch": 0.3677251946126282, + "grad_norm": 0.9672045860873493, + "learning_rate": 1.904234772090539e-05, + "loss": 0.3421085476875305, + "step": 744 + }, + { + "epoch": 0.36821944890646235, + "grad_norm": 0.9886363928023795, + "learning_rate": 1.90388555686316e-05, + "loss": 0.3626730442047119, + "step": 745 + }, + { + "epoch": 0.36871370320029656, + "grad_norm": 0.9308335236520315, + "learning_rate": 1.9035357382146984e-05, + "loss": 0.338506281375885, + "step": 746 + }, + { + "epoch": 0.3692079574941307, + "grad_norm": 1.010277605498289, + "learning_rate": 1.903185316378688e-05, + "loss": 0.3709959089756012, + "step": 747 + }, + { + "epoch": 0.3697022117879649, + "grad_norm": 1.0369282663858728, + "learning_rate": 1.9028342915890655e-05, + "loss": 0.3804059624671936, + "step": 748 + }, + { + "epoch": 0.37019646608179907, + "grad_norm": 1.0305613800678137, + "learning_rate": 1.9024826640801694e-05, + "loss": 0.3416539132595062, + "step": 749 + }, + { + "epoch": 0.3706907203756333, + "grad_norm": 1.0119233680399335, + "learning_rate": 1.9021304340867418e-05, + "loss": 0.3642072081565857, + "step": 750 + }, + { + "epoch": 0.3711849746694674, + "grad_norm": 0.9749783281253589, + "learning_rate": 1.9017776018439267e-05, + "loss": 0.35957199335098267, + "step": 751 + }, + { + "epoch": 0.37167922896330163, + "grad_norm": 1.1539382067501942, + "learning_rate": 1.9014241675872692e-05, + "loss": 0.38497287034988403, + "step": 752 + }, + { + "epoch": 0.3721734832571358, + "grad_norm": 1.1731793747690833, + "learning_rate": 1.9010701315527173e-05, + "loss": 0.40713614225387573, + "step": 753 + }, + { + "epoch": 0.37266773755097, + "grad_norm": 1.0417857344342851, + "learning_rate": 1.9007154939766196e-05, + "loss": 0.35115551948547363, + "step": 754 + }, + { + "epoch": 0.37316199184480414, + "grad_norm": 0.958988647508799, + "learning_rate": 1.9003602550957284e-05, + "loss": 0.3478096127510071, + "step": 755 + }, + { + "epoch": 0.37365624613863835, + "grad_norm": 1.040896998789985, + "learning_rate": 1.9000044151471956e-05, + "loss": 0.36460641026496887, + "step": 756 + }, + { + "epoch": 0.3741505004324725, + "grad_norm": 1.1161707385765272, + "learning_rate": 1.8996479743685745e-05, + "loss": 0.38015758991241455, + "step": 757 + }, + { + "epoch": 0.3746447547263067, + "grad_norm": 1.1039269634713542, + "learning_rate": 1.8992909329978202e-05, + "loss": 0.35270214080810547, + "step": 758 + }, + { + "epoch": 0.37513900902014086, + "grad_norm": 1.0025131869881447, + "learning_rate": 1.8989332912732884e-05, + "loss": 0.3875473439693451, + "step": 759 + }, + { + "epoch": 0.37563326331397506, + "grad_norm": 1.0209812095079043, + "learning_rate": 1.8985750494337353e-05, + "loss": 0.3281819820404053, + "step": 760 + }, + { + "epoch": 0.3761275176078092, + "grad_norm": 1.2490133288735825, + "learning_rate": 1.8982162077183182e-05, + "loss": 0.4081311821937561, + "step": 761 + }, + { + "epoch": 0.3766217719016434, + "grad_norm": 1.2134865751354402, + "learning_rate": 1.897856766366595e-05, + "loss": 0.3546852469444275, + "step": 762 + }, + { + "epoch": 0.37711602619547757, + "grad_norm": 0.9620958606777789, + "learning_rate": 1.8974967256185234e-05, + "loss": 0.3177235424518585, + "step": 763 + }, + { + "epoch": 0.3776102804893118, + "grad_norm": 1.0401218813843935, + "learning_rate": 1.8971360857144616e-05, + "loss": 0.3739625811576843, + "step": 764 + }, + { + "epoch": 0.37810453478314593, + "grad_norm": 0.9714277368627854, + "learning_rate": 1.8967748468951673e-05, + "loss": 0.32039010524749756, + "step": 765 + }, + { + "epoch": 0.3785987890769801, + "grad_norm": 1.0178844258047104, + "learning_rate": 1.8964130094017986e-05, + "loss": 0.3237234354019165, + "step": 766 + }, + { + "epoch": 0.3790930433708143, + "grad_norm": 1.0589536664735313, + "learning_rate": 1.896050573475913e-05, + "loss": 0.33864307403564453, + "step": 767 + }, + { + "epoch": 0.37958729766464844, + "grad_norm": 1.076259010215984, + "learning_rate": 1.8956875393594675e-05, + "loss": 0.40412086248397827, + "step": 768 + }, + { + "epoch": 0.38008155195848264, + "grad_norm": 1.049114130745209, + "learning_rate": 1.8953239072948185e-05, + "loss": 0.37689530849456787, + "step": 769 + }, + { + "epoch": 0.3805758062523168, + "grad_norm": 1.1429748380406861, + "learning_rate": 1.8949596775247215e-05, + "loss": 0.3632664680480957, + "step": 770 + }, + { + "epoch": 0.381070060546151, + "grad_norm": 1.0707340379824546, + "learning_rate": 1.8945948502923314e-05, + "loss": 0.384027361869812, + "step": 771 + }, + { + "epoch": 0.38156431483998515, + "grad_norm": 1.0884709757767692, + "learning_rate": 1.8942294258412012e-05, + "loss": 0.37623292207717896, + "step": 772 + }, + { + "epoch": 0.38205856913381936, + "grad_norm": 0.9918916696644151, + "learning_rate": 1.8938634044152837e-05, + "loss": 0.3449557423591614, + "step": 773 + }, + { + "epoch": 0.3825528234276535, + "grad_norm": 1.0216495444427651, + "learning_rate": 1.8934967862589287e-05, + "loss": 0.37977170944213867, + "step": 774 + }, + { + "epoch": 0.3830470777214877, + "grad_norm": 1.035626875821766, + "learning_rate": 1.893129571616886e-05, + "loss": 0.3535463809967041, + "step": 775 + }, + { + "epoch": 0.38354133201532187, + "grad_norm": 0.9784961361645077, + "learning_rate": 1.8927617607343024e-05, + "loss": 0.3107556104660034, + "step": 776 + }, + { + "epoch": 0.3840355863091561, + "grad_norm": 0.9647734455274504, + "learning_rate": 1.8923933538567238e-05, + "loss": 0.33028605580329895, + "step": 777 + }, + { + "epoch": 0.3845298406029902, + "grad_norm": 1.0880250729774004, + "learning_rate": 1.8920243512300925e-05, + "loss": 0.35947421193122864, + "step": 778 + }, + { + "epoch": 0.38502409489682443, + "grad_norm": 1.1225656593555045, + "learning_rate": 1.89165475310075e-05, + "loss": 0.36262935400009155, + "step": 779 + }, + { + "epoch": 0.3855183491906586, + "grad_norm": 0.9595574558826961, + "learning_rate": 1.8912845597154344e-05, + "loss": 0.3441828489303589, + "step": 780 + }, + { + "epoch": 0.3860126034844928, + "grad_norm": 1.1060761912194574, + "learning_rate": 1.8909137713212813e-05, + "loss": 0.3748928904533386, + "step": 781 + }, + { + "epoch": 0.38650685777832694, + "grad_norm": 1.0401989681427097, + "learning_rate": 1.8905423881658248e-05, + "loss": 0.3571966588497162, + "step": 782 + }, + { + "epoch": 0.38700111207216115, + "grad_norm": 1.0661600684644588, + "learning_rate": 1.8901704104969937e-05, + "loss": 0.3937920331954956, + "step": 783 + }, + { + "epoch": 0.3874953663659953, + "grad_norm": 1.036207969764135, + "learning_rate": 1.8897978385631157e-05, + "loss": 0.3641708493232727, + "step": 784 + }, + { + "epoch": 0.3879896206598295, + "grad_norm": 1.0259735566777997, + "learning_rate": 1.8894246726129143e-05, + "loss": 0.33510833978652954, + "step": 785 + }, + { + "epoch": 0.38848387495366365, + "grad_norm": 1.0496886995032506, + "learning_rate": 1.88905091289551e-05, + "loss": 0.3553236722946167, + "step": 786 + }, + { + "epoch": 0.38897812924749786, + "grad_norm": 1.1065055000350301, + "learning_rate": 1.8886765596604188e-05, + "loss": 0.3802195191383362, + "step": 787 + }, + { + "epoch": 0.389472383541332, + "grad_norm": 1.0233155379560877, + "learning_rate": 1.8883016131575546e-05, + "loss": 0.3672805726528168, + "step": 788 + }, + { + "epoch": 0.3899666378351662, + "grad_norm": 1.1021600101810725, + "learning_rate": 1.887926073637225e-05, + "loss": 0.35715609788894653, + "step": 789 + }, + { + "epoch": 0.39046089212900037, + "grad_norm": 1.0669470229074853, + "learning_rate": 1.8875499413501362e-05, + "loss": 0.3800659775733948, + "step": 790 + }, + { + "epoch": 0.3909551464228345, + "grad_norm": 1.0110531011706714, + "learning_rate": 1.8871732165473878e-05, + "loss": 0.36886462569236755, + "step": 791 + }, + { + "epoch": 0.3914494007166687, + "grad_norm": 1.1716485087298352, + "learning_rate": 1.886795899480476e-05, + "loss": 0.37373536825180054, + "step": 792 + }, + { + "epoch": 0.3919436550105029, + "grad_norm": 1.1804493539453536, + "learning_rate": 1.8864179904012932e-05, + "loss": 0.4016551375389099, + "step": 793 + }, + { + "epoch": 0.3924379093043371, + "grad_norm": 1.3227573763511704, + "learning_rate": 1.886039489562125e-05, + "loss": 0.35107535123825073, + "step": 794 + }, + { + "epoch": 0.39293216359817124, + "grad_norm": 1.0690764214154878, + "learning_rate": 1.8856603972156532e-05, + "loss": 0.36280331015586853, + "step": 795 + }, + { + "epoch": 0.39342641789200544, + "grad_norm": 1.0372650355149657, + "learning_rate": 1.885280713614955e-05, + "loss": 0.3417884111404419, + "step": 796 + }, + { + "epoch": 0.3939206721858396, + "grad_norm": 1.0135638633522712, + "learning_rate": 1.8849004390135017e-05, + "loss": 0.3257544934749603, + "step": 797 + }, + { + "epoch": 0.3944149264796738, + "grad_norm": 1.138312578356034, + "learning_rate": 1.8845195736651588e-05, + "loss": 0.3694860339164734, + "step": 798 + }, + { + "epoch": 0.39490918077350795, + "grad_norm": 1.0432466517484986, + "learning_rate": 1.8841381178241865e-05, + "loss": 0.37279266119003296, + "step": 799 + }, + { + "epoch": 0.39540343506734216, + "grad_norm": 1.023281980764518, + "learning_rate": 1.88375607174524e-05, + "loss": 0.38758352398872375, + "step": 800 + }, + { + "epoch": 0.3958976893611763, + "grad_norm": 1.0321652923702807, + "learning_rate": 1.883373435683367e-05, + "loss": 0.34098950028419495, + "step": 801 + }, + { + "epoch": 0.3963919436550105, + "grad_norm": 1.0256865325574602, + "learning_rate": 1.8829902098940105e-05, + "loss": 0.3278653621673584, + "step": 802 + }, + { + "epoch": 0.39688619794884467, + "grad_norm": 1.1042531688452888, + "learning_rate": 1.8826063946330065e-05, + "loss": 0.3673133850097656, + "step": 803 + }, + { + "epoch": 0.39738045224267887, + "grad_norm": 0.9510108180701087, + "learning_rate": 1.882221990156584e-05, + "loss": 0.37917453050613403, + "step": 804 + }, + { + "epoch": 0.397874706536513, + "grad_norm": 0.9926574292369763, + "learning_rate": 1.8818369967213662e-05, + "loss": 0.33986327052116394, + "step": 805 + }, + { + "epoch": 0.39836896083034723, + "grad_norm": 1.0256369099360807, + "learning_rate": 1.8814514145843694e-05, + "loss": 0.34402647614479065, + "step": 806 + }, + { + "epoch": 0.3988632151241814, + "grad_norm": 1.0984836868071073, + "learning_rate": 1.8810652440030026e-05, + "loss": 0.32781803607940674, + "step": 807 + }, + { + "epoch": 0.3993574694180156, + "grad_norm": 1.063630501097469, + "learning_rate": 1.8806784852350678e-05, + "loss": 0.35807961225509644, + "step": 808 + }, + { + "epoch": 0.39985172371184974, + "grad_norm": 1.0130076092125457, + "learning_rate": 1.8802911385387596e-05, + "loss": 0.33577096462249756, + "step": 809 + }, + { + "epoch": 0.40034597800568394, + "grad_norm": 1.205635135602797, + "learning_rate": 1.8799032041726654e-05, + "loss": 0.37786391377449036, + "step": 810 + }, + { + "epoch": 0.4008402322995181, + "grad_norm": 1.0055899694647235, + "learning_rate": 1.879514682395764e-05, + "loss": 0.3237725496292114, + "step": 811 + }, + { + "epoch": 0.4013344865933523, + "grad_norm": 1.0557641796624602, + "learning_rate": 1.8791255734674275e-05, + "loss": 0.29552844166755676, + "step": 812 + }, + { + "epoch": 0.40182874088718645, + "grad_norm": 1.0675222800328668, + "learning_rate": 1.8787358776474192e-05, + "loss": 0.40317612886428833, + "step": 813 + }, + { + "epoch": 0.40232299518102066, + "grad_norm": 1.0548023053217102, + "learning_rate": 1.8783455951958948e-05, + "loss": 0.33383694291114807, + "step": 814 + }, + { + "epoch": 0.4028172494748548, + "grad_norm": 1.0255061005640398, + "learning_rate": 1.8779547263734012e-05, + "loss": 0.35020262002944946, + "step": 815 + }, + { + "epoch": 0.403311503768689, + "grad_norm": 1.098709822155027, + "learning_rate": 1.8775632714408765e-05, + "loss": 0.3742774724960327, + "step": 816 + }, + { + "epoch": 0.40380575806252317, + "grad_norm": 0.9986084839363315, + "learning_rate": 1.8771712306596506e-05, + "loss": 0.35037580132484436, + "step": 817 + }, + { + "epoch": 0.4043000123563573, + "grad_norm": 1.078218018297503, + "learning_rate": 1.8767786042914445e-05, + "loss": 0.3416820168495178, + "step": 818 + }, + { + "epoch": 0.4047942666501915, + "grad_norm": 1.0398523365943921, + "learning_rate": 1.8763853925983695e-05, + "loss": 0.33287927508354187, + "step": 819 + }, + { + "epoch": 0.4052885209440257, + "grad_norm": 1.031774367057856, + "learning_rate": 1.875991595842929e-05, + "loss": 0.3493141531944275, + "step": 820 + }, + { + "epoch": 0.4057827752378599, + "grad_norm": 1.1647269737420223, + "learning_rate": 1.875597214288015e-05, + "loss": 0.4184780418872833, + "step": 821 + }, + { + "epoch": 0.40627702953169403, + "grad_norm": 1.0098974718957208, + "learning_rate": 1.8752022481969116e-05, + "loss": 0.33189794421195984, + "step": 822 + }, + { + "epoch": 0.40677128382552824, + "grad_norm": 1.1012026040533913, + "learning_rate": 1.8748066978332925e-05, + "loss": 0.35339856147766113, + "step": 823 + }, + { + "epoch": 0.4072655381193624, + "grad_norm": 0.990995886573267, + "learning_rate": 1.874410563461221e-05, + "loss": 0.3766328692436218, + "step": 824 + }, + { + "epoch": 0.4077597924131966, + "grad_norm": 1.023451056136873, + "learning_rate": 1.874013845345152e-05, + "loss": 0.32575076818466187, + "step": 825 + }, + { + "epoch": 0.40825404670703075, + "grad_norm": 0.9933822197860499, + "learning_rate": 1.8736165437499273e-05, + "loss": 0.3417864441871643, + "step": 826 + }, + { + "epoch": 0.40874830100086496, + "grad_norm": 1.053854919420327, + "learning_rate": 1.8732186589407807e-05, + "loss": 0.3636544942855835, + "step": 827 + }, + { + "epoch": 0.4092425552946991, + "grad_norm": 1.0398605740994966, + "learning_rate": 1.872820191183334e-05, + "loss": 0.38730406761169434, + "step": 828 + }, + { + "epoch": 0.4097368095885333, + "grad_norm": 1.031894160648423, + "learning_rate": 1.872421140743599e-05, + "loss": 0.3593043088912964, + "step": 829 + }, + { + "epoch": 0.41023106388236746, + "grad_norm": 1.046860972263581, + "learning_rate": 1.872021507887976e-05, + "loss": 0.39092978835105896, + "step": 830 + }, + { + "epoch": 0.41072531817620167, + "grad_norm": 1.1607362555786684, + "learning_rate": 1.8716212928832537e-05, + "loss": 0.3745616674423218, + "step": 831 + }, + { + "epoch": 0.4112195724700358, + "grad_norm": 1.1451994826740608, + "learning_rate": 1.87122049599661e-05, + "loss": 0.39571845531463623, + "step": 832 + }, + { + "epoch": 0.41171382676387, + "grad_norm": 1.0987542615004384, + "learning_rate": 1.8708191174956116e-05, + "loss": 0.35459476709365845, + "step": 833 + }, + { + "epoch": 0.4122080810577042, + "grad_norm": 1.1159636372579822, + "learning_rate": 1.870417157648213e-05, + "loss": 0.38937896490097046, + "step": 834 + }, + { + "epoch": 0.4127023353515384, + "grad_norm": 1.002441779942121, + "learning_rate": 1.8700146167227563e-05, + "loss": 0.33595120906829834, + "step": 835 + }, + { + "epoch": 0.41319658964537254, + "grad_norm": 0.9899088387295479, + "learning_rate": 1.869611494987973e-05, + "loss": 0.332889199256897, + "step": 836 + }, + { + "epoch": 0.41369084393920674, + "grad_norm": 1.0005984941908395, + "learning_rate": 1.8692077927129803e-05, + "loss": 0.333438515663147, + "step": 837 + }, + { + "epoch": 0.4141850982330409, + "grad_norm": 0.9672990037342486, + "learning_rate": 1.868803510167285e-05, + "loss": 0.30645743012428284, + "step": 838 + }, + { + "epoch": 0.4146793525268751, + "grad_norm": 1.0166404987540014, + "learning_rate": 1.86839864762078e-05, + "loss": 0.3333967924118042, + "step": 839 + }, + { + "epoch": 0.41517360682070925, + "grad_norm": 1.1324675944020866, + "learning_rate": 1.867993205343746e-05, + "loss": 0.36230576038360596, + "step": 840 + }, + { + "epoch": 0.41566786111454346, + "grad_norm": 1.4565152055506116, + "learning_rate": 1.8675871836068498e-05, + "loss": 0.34191709756851196, + "step": 841 + }, + { + "epoch": 0.4161621154083776, + "grad_norm": 1.1876819294674656, + "learning_rate": 1.8671805826811462e-05, + "loss": 0.3115188479423523, + "step": 842 + }, + { + "epoch": 0.4166563697022118, + "grad_norm": 1.023080563524472, + "learning_rate": 1.866773402838076e-05, + "loss": 0.3725768029689789, + "step": 843 + }, + { + "epoch": 0.41715062399604597, + "grad_norm": 1.1051799194693688, + "learning_rate": 1.8663656443494673e-05, + "loss": 0.376983642578125, + "step": 844 + }, + { + "epoch": 0.4176448782898801, + "grad_norm": 1.0101343157113072, + "learning_rate": 1.8659573074875327e-05, + "loss": 0.31490784883499146, + "step": 845 + }, + { + "epoch": 0.4181391325837143, + "grad_norm": 1.0250002510666845, + "learning_rate": 1.8655483925248727e-05, + "loss": 0.3533504605293274, + "step": 846 + }, + { + "epoch": 0.4186333868775485, + "grad_norm": 1.090746715781531, + "learning_rate": 1.8651388997344734e-05, + "loss": 0.3282274305820465, + "step": 847 + }, + { + "epoch": 0.4191276411713827, + "grad_norm": 1.1145704933282803, + "learning_rate": 1.8647288293897055e-05, + "loss": 0.32892414927482605, + "step": 848 + }, + { + "epoch": 0.41962189546521683, + "grad_norm": 1.1451436882679205, + "learning_rate": 1.864318181764327e-05, + "loss": 0.40414246916770935, + "step": 849 + }, + { + "epoch": 0.42011614975905104, + "grad_norm": 0.9874933781402742, + "learning_rate": 1.8639069571324798e-05, + "loss": 0.30335378646850586, + "step": 850 + }, + { + "epoch": 0.4206104040528852, + "grad_norm": 1.0390790492756226, + "learning_rate": 1.863495155768692e-05, + "loss": 0.311710000038147, + "step": 851 + }, + { + "epoch": 0.4211046583467194, + "grad_norm": 1.1685121542837038, + "learning_rate": 1.8630827779478755e-05, + "loss": 0.37345218658447266, + "step": 852 + }, + { + "epoch": 0.42159891264055355, + "grad_norm": 1.118375459884757, + "learning_rate": 1.8626698239453287e-05, + "loss": 0.37286317348480225, + "step": 853 + }, + { + "epoch": 0.42209316693438775, + "grad_norm": 1.061435107804804, + "learning_rate": 1.8622562940367335e-05, + "loss": 0.3706691861152649, + "step": 854 + }, + { + "epoch": 0.4225874212282219, + "grad_norm": 1.045639661440086, + "learning_rate": 1.8618421884981567e-05, + "loss": 0.30183354020118713, + "step": 855 + }, + { + "epoch": 0.4230816755220561, + "grad_norm": 0.9282918926966607, + "learning_rate": 1.8614275076060486e-05, + "loss": 0.32329827547073364, + "step": 856 + }, + { + "epoch": 0.42357592981589026, + "grad_norm": 0.9823332197669685, + "learning_rate": 1.861012251637245e-05, + "loss": 0.39380010962486267, + "step": 857 + }, + { + "epoch": 0.42407018410972447, + "grad_norm": 1.2258684110272524, + "learning_rate": 1.8605964208689646e-05, + "loss": 0.41745316982269287, + "step": 858 + }, + { + "epoch": 0.4245644384035586, + "grad_norm": 1.0539643629085786, + "learning_rate": 1.86018001557881e-05, + "loss": 0.36751389503479004, + "step": 859 + }, + { + "epoch": 0.4250586926973928, + "grad_norm": 1.052378043397748, + "learning_rate": 1.8597630360447673e-05, + "loss": 0.36876100301742554, + "step": 860 + }, + { + "epoch": 0.425552946991227, + "grad_norm": 1.0649813734142937, + "learning_rate": 1.8593454825452067e-05, + "loss": 0.3473365306854248, + "step": 861 + }, + { + "epoch": 0.4260472012850612, + "grad_norm": 1.0186749062796028, + "learning_rate": 1.8589273553588802e-05, + "loss": 0.3429828882217407, + "step": 862 + }, + { + "epoch": 0.42654145557889533, + "grad_norm": 0.9471164855143414, + "learning_rate": 1.8585086547649238e-05, + "loss": 0.3424219787120819, + "step": 863 + }, + { + "epoch": 0.42703570987272954, + "grad_norm": 1.002345729786534, + "learning_rate": 1.8580893810428562e-05, + "loss": 0.32187891006469727, + "step": 864 + }, + { + "epoch": 0.4275299641665637, + "grad_norm": 0.997893238522563, + "learning_rate": 1.8576695344725785e-05, + "loss": 0.3116072416305542, + "step": 865 + }, + { + "epoch": 0.4280242184603979, + "grad_norm": 0.9198063604105835, + "learning_rate": 1.8572491153343742e-05, + "loss": 0.32645124197006226, + "step": 866 + }, + { + "epoch": 0.42851847275423205, + "grad_norm": 1.0827892730720303, + "learning_rate": 1.8568281239089088e-05, + "loss": 0.36861616373062134, + "step": 867 + }, + { + "epoch": 0.42901272704806626, + "grad_norm": 1.05561333743087, + "learning_rate": 1.8564065604772307e-05, + "loss": 0.38477885723114014, + "step": 868 + }, + { + "epoch": 0.4295069813419004, + "grad_norm": 1.1711610330815532, + "learning_rate": 1.8559844253207694e-05, + "loss": 0.352588951587677, + "step": 869 + }, + { + "epoch": 0.43000123563573456, + "grad_norm": 1.1459489566657088, + "learning_rate": 1.8555617187213362e-05, + "loss": 0.43443864583969116, + "step": 870 + }, + { + "epoch": 0.43049548992956876, + "grad_norm": 1.1608032541581428, + "learning_rate": 1.8551384409611238e-05, + "loss": 0.37355685234069824, + "step": 871 + }, + { + "epoch": 0.4309897442234029, + "grad_norm": 1.120838755410591, + "learning_rate": 1.854714592322707e-05, + "loss": 0.3529026508331299, + "step": 872 + }, + { + "epoch": 0.4314839985172371, + "grad_norm": 1.031744932760461, + "learning_rate": 1.854290173089041e-05, + "loss": 0.3278823494911194, + "step": 873 + }, + { + "epoch": 0.4319782528110713, + "grad_norm": 1.045846838310407, + "learning_rate": 1.8538651835434615e-05, + "loss": 0.3677588999271393, + "step": 874 + }, + { + "epoch": 0.4324725071049055, + "grad_norm": 0.9726822011565114, + "learning_rate": 1.8534396239696852e-05, + "loss": 0.34132176637649536, + "step": 875 + }, + { + "epoch": 0.43296676139873963, + "grad_norm": 0.967842291132869, + "learning_rate": 1.8530134946518106e-05, + "loss": 0.3329963684082031, + "step": 876 + }, + { + "epoch": 0.43346101569257384, + "grad_norm": 1.1447169522915757, + "learning_rate": 1.852586795874315e-05, + "loss": 0.38435080647468567, + "step": 877 + }, + { + "epoch": 0.433955269986408, + "grad_norm": 1.076068410050275, + "learning_rate": 1.8521595279220564e-05, + "loss": 0.3737541735172272, + "step": 878 + }, + { + "epoch": 0.4344495242802422, + "grad_norm": 1.0947429210573731, + "learning_rate": 1.851731691080273e-05, + "loss": 0.3676382303237915, + "step": 879 + }, + { + "epoch": 0.43494377857407635, + "grad_norm": 0.9624268111771948, + "learning_rate": 1.8513032856345825e-05, + "loss": 0.317960262298584, + "step": 880 + }, + { + "epoch": 0.43543803286791055, + "grad_norm": 1.040958800557315, + "learning_rate": 1.8508743118709816e-05, + "loss": 0.38857966661453247, + "step": 881 + }, + { + "epoch": 0.4359322871617447, + "grad_norm": 1.0694529449199925, + "learning_rate": 1.8504447700758482e-05, + "loss": 0.33234506845474243, + "step": 882 + }, + { + "epoch": 0.4364265414555789, + "grad_norm": 1.0262098516685678, + "learning_rate": 1.8500146605359375e-05, + "loss": 0.3380611538887024, + "step": 883 + }, + { + "epoch": 0.43692079574941306, + "grad_norm": 1.032922511494617, + "learning_rate": 1.8495839835383845e-05, + "loss": 0.36386823654174805, + "step": 884 + }, + { + "epoch": 0.43741505004324727, + "grad_norm": 1.0814661245803954, + "learning_rate": 1.849152739370703e-05, + "loss": 0.34711897373199463, + "step": 885 + }, + { + "epoch": 0.4379093043370814, + "grad_norm": 1.1112439466083954, + "learning_rate": 1.848720928320786e-05, + "loss": 0.3861457109451294, + "step": 886 + }, + { + "epoch": 0.4384035586309156, + "grad_norm": 1.0062524071684966, + "learning_rate": 1.848288550676904e-05, + "loss": 0.3387115001678467, + "step": 887 + }, + { + "epoch": 0.4388978129247498, + "grad_norm": 1.119801920916648, + "learning_rate": 1.847855606727706e-05, + "loss": 0.3419748842716217, + "step": 888 + }, + { + "epoch": 0.439392067218584, + "grad_norm": 1.1162084355940824, + "learning_rate": 1.847422096762219e-05, + "loss": 0.38184499740600586, + "step": 889 + }, + { + "epoch": 0.43988632151241813, + "grad_norm": 1.1974191241625343, + "learning_rate": 1.846988021069849e-05, + "loss": 0.3845345973968506, + "step": 890 + }, + { + "epoch": 0.44038057580625234, + "grad_norm": 1.035257767207683, + "learning_rate": 1.8465533799403778e-05, + "loss": 0.31854647397994995, + "step": 891 + }, + { + "epoch": 0.4408748301000865, + "grad_norm": 1.2150547461116588, + "learning_rate": 1.8461181736639658e-05, + "loss": 0.3940027356147766, + "step": 892 + }, + { + "epoch": 0.4413690843939207, + "grad_norm": 1.0827124100419134, + "learning_rate": 1.8456824025311508e-05, + "loss": 0.3580612540245056, + "step": 893 + }, + { + "epoch": 0.44186333868775485, + "grad_norm": 1.0457692243819372, + "learning_rate": 1.8452460668328474e-05, + "loss": 0.3662642240524292, + "step": 894 + }, + { + "epoch": 0.44235759298158905, + "grad_norm": 1.3135451040729966, + "learning_rate": 1.8448091668603464e-05, + "loss": 0.29031360149383545, + "step": 895 + }, + { + "epoch": 0.4428518472754232, + "grad_norm": 1.2267380523250877, + "learning_rate": 1.844371702905317e-05, + "loss": 0.36141306161880493, + "step": 896 + }, + { + "epoch": 0.44334610156925736, + "grad_norm": 0.9926258795727512, + "learning_rate": 1.8439336752598027e-05, + "loss": 0.35286253690719604, + "step": 897 + }, + { + "epoch": 0.44384035586309156, + "grad_norm": 1.0509214985554662, + "learning_rate": 1.8434950842162256e-05, + "loss": 0.38967087864875793, + "step": 898 + }, + { + "epoch": 0.4443346101569257, + "grad_norm": 1.1041873655686079, + "learning_rate": 1.8430559300673824e-05, + "loss": 0.4260423183441162, + "step": 899 + }, + { + "epoch": 0.4448288644507599, + "grad_norm": 1.0004221402171782, + "learning_rate": 1.8426162131064456e-05, + "loss": 0.35336780548095703, + "step": 900 + }, + { + "epoch": 0.44532311874459407, + "grad_norm": 1.0124996907215051, + "learning_rate": 1.842175933626965e-05, + "loss": 0.32953035831451416, + "step": 901 + }, + { + "epoch": 0.4458173730384283, + "grad_norm": 1.1481125848953921, + "learning_rate": 1.841735091922864e-05, + "loss": 0.3495085537433624, + "step": 902 + }, + { + "epoch": 0.44631162733226243, + "grad_norm": 1.0556558347257945, + "learning_rate": 1.8412936882884426e-05, + "loss": 0.3774382174015045, + "step": 903 + }, + { + "epoch": 0.44680588162609663, + "grad_norm": 1.1488659780400408, + "learning_rate": 1.8408517230183756e-05, + "loss": 0.397183358669281, + "step": 904 + }, + { + "epoch": 0.4473001359199308, + "grad_norm": 1.1226988100601583, + "learning_rate": 1.840409196407713e-05, + "loss": 0.4004632234573364, + "step": 905 + }, + { + "epoch": 0.447794390213765, + "grad_norm": 0.9888048683742604, + "learning_rate": 1.8399661087518784e-05, + "loss": 0.3464478850364685, + "step": 906 + }, + { + "epoch": 0.44828864450759914, + "grad_norm": 1.0618254470638813, + "learning_rate": 1.839522460346671e-05, + "loss": 0.38161879777908325, + "step": 907 + }, + { + "epoch": 0.44878289880143335, + "grad_norm": 1.0021571541379897, + "learning_rate": 1.839078251488265e-05, + "loss": 0.3307412266731262, + "step": 908 + }, + { + "epoch": 0.4492771530952675, + "grad_norm": 1.0558486391083746, + "learning_rate": 1.838633482473207e-05, + "loss": 0.3238945007324219, + "step": 909 + }, + { + "epoch": 0.4497714073891017, + "grad_norm": 1.1763396472681338, + "learning_rate": 1.8381881535984186e-05, + "loss": 0.37863802909851074, + "step": 910 + }, + { + "epoch": 0.45026566168293586, + "grad_norm": 1.187536001798055, + "learning_rate": 1.8377422651611955e-05, + "loss": 0.35920199751853943, + "step": 911 + }, + { + "epoch": 0.45075991597677006, + "grad_norm": 1.1108046485108733, + "learning_rate": 1.8372958174592054e-05, + "loss": 0.3913283050060272, + "step": 912 + }, + { + "epoch": 0.4512541702706042, + "grad_norm": 1.029447767687351, + "learning_rate": 1.8368488107904916e-05, + "loss": 0.32950836420059204, + "step": 913 + }, + { + "epoch": 0.4517484245644384, + "grad_norm": 0.9275296283957708, + "learning_rate": 1.8364012454534687e-05, + "loss": 0.30557066202163696, + "step": 914 + }, + { + "epoch": 0.4522426788582726, + "grad_norm": 1.0685283966213752, + "learning_rate": 1.835953121746925e-05, + "loss": 0.3280435800552368, + "step": 915 + }, + { + "epoch": 0.4527369331521068, + "grad_norm": 1.0053118292301932, + "learning_rate": 1.835504439970021e-05, + "loss": 0.323611319065094, + "step": 916 + }, + { + "epoch": 0.45323118744594093, + "grad_norm": 1.086332749113099, + "learning_rate": 1.835055200422292e-05, + "loss": 0.3794775605201721, + "step": 917 + }, + { + "epoch": 0.45372544173977514, + "grad_norm": 1.1746257984153148, + "learning_rate": 1.8346054034036418e-05, + "loss": 0.3437816798686981, + "step": 918 + }, + { + "epoch": 0.4542196960336093, + "grad_norm": 1.175593282348777, + "learning_rate": 1.8341550492143497e-05, + "loss": 0.40312957763671875, + "step": 919 + }, + { + "epoch": 0.4547139503274435, + "grad_norm": 1.0344840643948632, + "learning_rate": 1.833704138155065e-05, + "loss": 0.33988016843795776, + "step": 920 + }, + { + "epoch": 0.45520820462127765, + "grad_norm": 1.099362227926189, + "learning_rate": 1.83325267052681e-05, + "loss": 0.30893969535827637, + "step": 921 + }, + { + "epoch": 0.45570245891511185, + "grad_norm": 1.1279932203915406, + "learning_rate": 1.832800646630978e-05, + "loss": 0.3351095914840698, + "step": 922 + }, + { + "epoch": 0.456196713208946, + "grad_norm": 1.0211776718159757, + "learning_rate": 1.8323480667693335e-05, + "loss": 0.3235122561454773, + "step": 923 + }, + { + "epoch": 0.45669096750278015, + "grad_norm": 1.0274671423740642, + "learning_rate": 1.8318949312440126e-05, + "loss": 0.3482256531715393, + "step": 924 + }, + { + "epoch": 0.45718522179661436, + "grad_norm": 1.0223238909560575, + "learning_rate": 1.831441240357522e-05, + "loss": 0.3577580451965332, + "step": 925 + }, + { + "epoch": 0.4576794760904485, + "grad_norm": 1.100617534966992, + "learning_rate": 1.8309869944127386e-05, + "loss": 0.34081172943115234, + "step": 926 + }, + { + "epoch": 0.4581737303842827, + "grad_norm": 1.1911908757683491, + "learning_rate": 1.8305321937129118e-05, + "loss": 0.4041389524936676, + "step": 927 + }, + { + "epoch": 0.45866798467811687, + "grad_norm": 0.9300326755373893, + "learning_rate": 1.830076838561659e-05, + "loss": 0.3014240562915802, + "step": 928 + }, + { + "epoch": 0.4591622389719511, + "grad_norm": 1.0061666296037273, + "learning_rate": 1.829620929262969e-05, + "loss": 0.3105698823928833, + "step": 929 + }, + { + "epoch": 0.4596564932657852, + "grad_norm": 1.035696211609358, + "learning_rate": 1.8291644661212008e-05, + "loss": 0.36114832758903503, + "step": 930 + }, + { + "epoch": 0.46015074755961943, + "grad_norm": 1.0621844186259055, + "learning_rate": 1.828707449441082e-05, + "loss": 0.33738240599632263, + "step": 931 + }, + { + "epoch": 0.4606450018534536, + "grad_norm": 1.0507412286541111, + "learning_rate": 1.8282498795277108e-05, + "loss": 0.3455100655555725, + "step": 932 + }, + { + "epoch": 0.4611392561472878, + "grad_norm": 1.0635377650103532, + "learning_rate": 1.8277917566865544e-05, + "loss": 0.3622395992279053, + "step": 933 + }, + { + "epoch": 0.46163351044112194, + "grad_norm": 1.1698746861585616, + "learning_rate": 1.8273330812234488e-05, + "loss": 0.36942192912101746, + "step": 934 + }, + { + "epoch": 0.46212776473495615, + "grad_norm": 1.1083328377879573, + "learning_rate": 1.8268738534445996e-05, + "loss": 0.33603039383888245, + "step": 935 + }, + { + "epoch": 0.4626220190287903, + "grad_norm": 1.0473328437100615, + "learning_rate": 1.82641407365658e-05, + "loss": 0.34806567430496216, + "step": 936 + }, + { + "epoch": 0.4631162733226245, + "grad_norm": 1.0559884618945852, + "learning_rate": 1.8259537421663333e-05, + "loss": 0.35512328147888184, + "step": 937 + }, + { + "epoch": 0.46361052761645866, + "grad_norm": 1.0108795008514326, + "learning_rate": 1.8254928592811695e-05, + "loss": 0.33349719643592834, + "step": 938 + }, + { + "epoch": 0.46410478191029286, + "grad_norm": 1.2122442261111321, + "learning_rate": 1.8250314253087677e-05, + "loss": 0.3510274887084961, + "step": 939 + }, + { + "epoch": 0.464599036204127, + "grad_norm": 1.2184941603930532, + "learning_rate": 1.824569440557175e-05, + "loss": 0.35831883549690247, + "step": 940 + }, + { + "epoch": 0.4650932904979612, + "grad_norm": 1.1635496425287044, + "learning_rate": 1.824106905334805e-05, + "loss": 0.353208065032959, + "step": 941 + }, + { + "epoch": 0.46558754479179537, + "grad_norm": 1.1400926219916139, + "learning_rate": 1.8236438199504402e-05, + "loss": 0.3335849642753601, + "step": 942 + }, + { + "epoch": 0.4660817990856296, + "grad_norm": 1.0623049779098108, + "learning_rate": 1.8231801847132294e-05, + "loss": 0.346247136592865, + "step": 943 + }, + { + "epoch": 0.46657605337946373, + "grad_norm": 1.0719060242361118, + "learning_rate": 1.8227159999326895e-05, + "loss": 0.35125380754470825, + "step": 944 + }, + { + "epoch": 0.46707030767329794, + "grad_norm": 1.026675887024196, + "learning_rate": 1.822251265918703e-05, + "loss": 0.34262675046920776, + "step": 945 + }, + { + "epoch": 0.4675645619671321, + "grad_norm": 1.0951735908349534, + "learning_rate": 1.82178598298152e-05, + "loss": 0.3437168598175049, + "step": 946 + }, + { + "epoch": 0.4680588162609663, + "grad_norm": 1.2204880290084008, + "learning_rate": 1.8213201514317565e-05, + "loss": 0.35729774832725525, + "step": 947 + }, + { + "epoch": 0.46855307055480044, + "grad_norm": 1.1062871199303559, + "learning_rate": 1.8208537715803954e-05, + "loss": 0.36507898569107056, + "step": 948 + }, + { + "epoch": 0.46904732484863465, + "grad_norm": 1.0875432400928187, + "learning_rate": 1.8203868437387847e-05, + "loss": 0.363017737865448, + "step": 949 + }, + { + "epoch": 0.4695415791424688, + "grad_norm": 1.0718622311605446, + "learning_rate": 1.8199193682186388e-05, + "loss": 0.3645821511745453, + "step": 950 + }, + { + "epoch": 0.47003583343630295, + "grad_norm": 1.2195854283374437, + "learning_rate": 1.8194513453320387e-05, + "loss": 0.3054324686527252, + "step": 951 + }, + { + "epoch": 0.47053008773013716, + "grad_norm": 1.0538248118306075, + "learning_rate": 1.8189827753914282e-05, + "loss": 0.35003694891929626, + "step": 952 + }, + { + "epoch": 0.4710243420239713, + "grad_norm": 1.1789267282791076, + "learning_rate": 1.8185136587096193e-05, + "loss": 0.37834814190864563, + "step": 953 + }, + { + "epoch": 0.4715185963178055, + "grad_norm": 1.0741971770420784, + "learning_rate": 1.8180439955997867e-05, + "loss": 0.3369285464286804, + "step": 954 + }, + { + "epoch": 0.47201285061163967, + "grad_norm": 1.010532535770725, + "learning_rate": 1.8175737863754706e-05, + "loss": 0.3612895905971527, + "step": 955 + }, + { + "epoch": 0.4725071049054739, + "grad_norm": 1.057430538694607, + "learning_rate": 1.817103031350577e-05, + "loss": 0.34393271803855896, + "step": 956 + }, + { + "epoch": 0.473001359199308, + "grad_norm": 1.0983705860238564, + "learning_rate": 1.8166317308393745e-05, + "loss": 0.3824620544910431, + "step": 957 + }, + { + "epoch": 0.47349561349314223, + "grad_norm": 1.0093831974265368, + "learning_rate": 1.816159885156497e-05, + "loss": 0.3092145621776581, + "step": 958 + }, + { + "epoch": 0.4739898677869764, + "grad_norm": 0.9971938324913802, + "learning_rate": 1.8156874946169414e-05, + "loss": 0.3328183889389038, + "step": 959 + }, + { + "epoch": 0.4744841220808106, + "grad_norm": 1.1071894513842127, + "learning_rate": 1.815214559536069e-05, + "loss": 0.3715244233608246, + "step": 960 + }, + { + "epoch": 0.47497837637464474, + "grad_norm": 0.9615506144211561, + "learning_rate": 1.814741080229605e-05, + "loss": 0.31065690517425537, + "step": 961 + }, + { + "epoch": 0.47547263066847895, + "grad_norm": 1.0443475280559777, + "learning_rate": 1.814267057013637e-05, + "loss": 0.3632475733757019, + "step": 962 + }, + { + "epoch": 0.4759668849623131, + "grad_norm": 1.0447314581931118, + "learning_rate": 1.813792490204616e-05, + "loss": 0.3367992043495178, + "step": 963 + }, + { + "epoch": 0.4764611392561473, + "grad_norm": 3.0902704784337263, + "learning_rate": 1.813317380119356e-05, + "loss": 0.37678295373916626, + "step": 964 + }, + { + "epoch": 0.47695539354998145, + "grad_norm": 1.092515860835368, + "learning_rate": 1.8128417270750342e-05, + "loss": 0.31454166769981384, + "step": 965 + }, + { + "epoch": 0.47744964784381566, + "grad_norm": 1.1351912635055343, + "learning_rate": 1.81236553138919e-05, + "loss": 0.38495004177093506, + "step": 966 + }, + { + "epoch": 0.4779439021376498, + "grad_norm": 1.1935841314497264, + "learning_rate": 1.8118887933797237e-05, + "loss": 0.3867315948009491, + "step": 967 + }, + { + "epoch": 0.478438156431484, + "grad_norm": 1.0520609240642282, + "learning_rate": 1.8114115133648996e-05, + "loss": 0.3453156650066376, + "step": 968 + }, + { + "epoch": 0.47893241072531817, + "grad_norm": 1.0244115852831113, + "learning_rate": 1.8109336916633426e-05, + "loss": 0.34461456537246704, + "step": 969 + }, + { + "epoch": 0.4794266650191524, + "grad_norm": 1.0814329785787762, + "learning_rate": 1.8104553285940404e-05, + "loss": 0.36489856243133545, + "step": 970 + }, + { + "epoch": 0.4799209193129865, + "grad_norm": 1.0551232871498393, + "learning_rate": 1.80997642447634e-05, + "loss": 0.3596840500831604, + "step": 971 + }, + { + "epoch": 0.48041517360682073, + "grad_norm": 1.1473167291229827, + "learning_rate": 1.8094969796299527e-05, + "loss": 0.3856956362724304, + "step": 972 + }, + { + "epoch": 0.4809094279006549, + "grad_norm": 1.036679746340059, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.3235170245170593, + "step": 973 + }, + { + "epoch": 0.4814036821944891, + "grad_norm": 0.9980037023378185, + "learning_rate": 1.8085364690317564e-05, + "loss": 0.28033584356307983, + "step": 974 + }, + { + "epoch": 0.48189793648832324, + "grad_norm": 1.0350899218465197, + "learning_rate": 1.808055403921171e-05, + "loss": 0.3279935419559479, + "step": 975 + }, + { + "epoch": 0.4823921907821574, + "grad_norm": 1.1400322966724836, + "learning_rate": 1.8075737993643442e-05, + "loss": 0.36426058411598206, + "step": 976 + }, + { + "epoch": 0.4828864450759916, + "grad_norm": 1.062964412180167, + "learning_rate": 1.8070916556827876e-05, + "loss": 0.3720256984233856, + "step": 977 + }, + { + "epoch": 0.48338069936982575, + "grad_norm": 1.1101144076762623, + "learning_rate": 1.8066089731983735e-05, + "loss": 0.3299727439880371, + "step": 978 + }, + { + "epoch": 0.48387495366365996, + "grad_norm": 1.1080862284860111, + "learning_rate": 1.8061257522333338e-05, + "loss": 0.3425888419151306, + "step": 979 + }, + { + "epoch": 0.4843692079574941, + "grad_norm": 1.1899160965861721, + "learning_rate": 1.80564199311026e-05, + "loss": 0.34109392762184143, + "step": 980 + }, + { + "epoch": 0.4848634622513283, + "grad_norm": 1.017538963669655, + "learning_rate": 1.805157696152103e-05, + "loss": 0.29130926728248596, + "step": 981 + }, + { + "epoch": 0.48535771654516247, + "grad_norm": 1.1092378859222098, + "learning_rate": 1.8046728616821726e-05, + "loss": 0.36200815439224243, + "step": 982 + }, + { + "epoch": 0.48585197083899667, + "grad_norm": 1.3150178990962822, + "learning_rate": 1.8041874900241368e-05, + "loss": 0.3343828320503235, + "step": 983 + }, + { + "epoch": 0.4863462251328308, + "grad_norm": 0.9882024578614582, + "learning_rate": 1.803701581502023e-05, + "loss": 0.32942160964012146, + "step": 984 + }, + { + "epoch": 0.48684047942666503, + "grad_norm": 0.9909863431121513, + "learning_rate": 1.803215136440217e-05, + "loss": 0.34390491247177124, + "step": 985 + }, + { + "epoch": 0.4873347337204992, + "grad_norm": 1.1118778887065912, + "learning_rate": 1.8027281551634622e-05, + "loss": 0.37723374366760254, + "step": 986 + }, + { + "epoch": 0.4878289880143334, + "grad_norm": 1.0469525424396737, + "learning_rate": 1.802240637996861e-05, + "loss": 0.3493693470954895, + "step": 987 + }, + { + "epoch": 0.48832324230816754, + "grad_norm": 1.131021341780466, + "learning_rate": 1.8017525852658723e-05, + "loss": 0.3564317524433136, + "step": 988 + }, + { + "epoch": 0.48881749660200174, + "grad_norm": 1.0435634175515676, + "learning_rate": 1.8012639972963136e-05, + "loss": 0.36572349071502686, + "step": 989 + }, + { + "epoch": 0.4893117508958359, + "grad_norm": 1.0078714155476896, + "learning_rate": 1.8007748744143586e-05, + "loss": 0.31457674503326416, + "step": 990 + }, + { + "epoch": 0.4898060051896701, + "grad_norm": 1.1126722971991523, + "learning_rate": 1.8002852169465393e-05, + "loss": 0.36191096901893616, + "step": 991 + }, + { + "epoch": 0.49030025948350425, + "grad_norm": 1.0321930748215848, + "learning_rate": 1.799795025219744e-05, + "loss": 0.33284491300582886, + "step": 992 + }, + { + "epoch": 0.49079451377733846, + "grad_norm": 1.0239281284644144, + "learning_rate": 1.7993042995612172e-05, + "loss": 0.3101437985897064, + "step": 993 + }, + { + "epoch": 0.4912887680711726, + "grad_norm": 1.033291904553078, + "learning_rate": 1.7988130402985608e-05, + "loss": 0.3196948170661926, + "step": 994 + }, + { + "epoch": 0.4917830223650068, + "grad_norm": 1.1489266069218314, + "learning_rate": 1.7983212477597325e-05, + "loss": 0.3757585883140564, + "step": 995 + }, + { + "epoch": 0.49227727665884097, + "grad_norm": 1.1725728838471274, + "learning_rate": 1.7978289222730454e-05, + "loss": 0.3949659466743469, + "step": 996 + }, + { + "epoch": 0.4927715309526752, + "grad_norm": 1.1279800728609437, + "learning_rate": 1.79733606416717e-05, + "loss": 0.3490184545516968, + "step": 997 + }, + { + "epoch": 0.4932657852465093, + "grad_norm": 1.2158784468170585, + "learning_rate": 1.7968426737711304e-05, + "loss": 0.32302743196487427, + "step": 998 + }, + { + "epoch": 0.49376003954034353, + "grad_norm": 1.1923748239050125, + "learning_rate": 1.7963487514143073e-05, + "loss": 0.4205089807510376, + "step": 999 + }, + { + "epoch": 0.4942542938341777, + "grad_norm": 1.0997609009048648, + "learning_rate": 1.7958542974264363e-05, + "loss": 0.30787885189056396, + "step": 1000 + }, + { + "epoch": 0.4947485481280119, + "grad_norm": 0.9527130505595168, + "learning_rate": 1.7953593121376075e-05, + "loss": 0.3174916207790375, + "step": 1001 + }, + { + "epoch": 0.49524280242184604, + "grad_norm": 0.9736659707101099, + "learning_rate": 1.7948637958782662e-05, + "loss": 0.330039381980896, + "step": 1002 + }, + { + "epoch": 0.4957370567156802, + "grad_norm": 1.0487288206783625, + "learning_rate": 1.794367748979212e-05, + "loss": 0.3362613320350647, + "step": 1003 + }, + { + "epoch": 0.4962313110095144, + "grad_norm": 1.065682818958373, + "learning_rate": 1.793871171771599e-05, + "loss": 0.3479865789413452, + "step": 1004 + }, + { + "epoch": 0.49672556530334855, + "grad_norm": 1.0920057715386207, + "learning_rate": 1.7933740645869345e-05, + "loss": 0.361303448677063, + "step": 1005 + }, + { + "epoch": 0.49721981959718276, + "grad_norm": 1.07605927747069, + "learning_rate": 1.79287642775708e-05, + "loss": 0.32340794801712036, + "step": 1006 + }, + { + "epoch": 0.4977140738910169, + "grad_norm": 1.086462795838887, + "learning_rate": 1.792378261614252e-05, + "loss": 0.3410148620605469, + "step": 1007 + }, + { + "epoch": 0.4982083281848511, + "grad_norm": 1.0450045575623719, + "learning_rate": 1.791879566491018e-05, + "loss": 0.3332127034664154, + "step": 1008 + }, + { + "epoch": 0.49870258247868526, + "grad_norm": 1.1673390171795246, + "learning_rate": 1.7913803427202998e-05, + "loss": 0.36532774567604065, + "step": 1009 + }, + { + "epoch": 0.49919683677251947, + "grad_norm": 1.1838892890378474, + "learning_rate": 1.7908805906353725e-05, + "loss": 0.3721959888935089, + "step": 1010 + }, + { + "epoch": 0.4996910910663536, + "grad_norm": 0.990806411218012, + "learning_rate": 1.7903803105698627e-05, + "loss": 0.3406672477722168, + "step": 1011 + }, + { + "epoch": 0.5001853453601878, + "grad_norm": 1.0152890264941994, + "learning_rate": 1.789879502857751e-05, + "loss": 0.323926717042923, + "step": 1012 + }, + { + "epoch": 0.500679599654022, + "grad_norm": 1.082078334287421, + "learning_rate": 1.7893781678333694e-05, + "loss": 0.36245018243789673, + "step": 1013 + }, + { + "epoch": 0.5011738539478562, + "grad_norm": 1.1363612319173766, + "learning_rate": 1.7888763058314016e-05, + "loss": 0.36145877838134766, + "step": 1014 + }, + { + "epoch": 0.5016681082416904, + "grad_norm": 0.9479821815236287, + "learning_rate": 1.788373917186884e-05, + "loss": 0.31398001313209534, + "step": 1015 + }, + { + "epoch": 0.5021623625355245, + "grad_norm": 1.0634976007398544, + "learning_rate": 1.7878710022352033e-05, + "loss": 0.36732447147369385, + "step": 1016 + }, + { + "epoch": 0.5026566168293587, + "grad_norm": 1.0888289854290114, + "learning_rate": 1.787367561312099e-05, + "loss": 0.3336929678916931, + "step": 1017 + }, + { + "epoch": 0.5031508711231929, + "grad_norm": 1.081948070644993, + "learning_rate": 1.786863594753661e-05, + "loss": 0.33306068181991577, + "step": 1018 + }, + { + "epoch": 0.5036451254170271, + "grad_norm": 1.1710814753085148, + "learning_rate": 1.7863591028963297e-05, + "loss": 0.32577213644981384, + "step": 1019 + }, + { + "epoch": 0.5041393797108612, + "grad_norm": 1.0902819718302648, + "learning_rate": 1.7858540860768974e-05, + "loss": 0.33542972803115845, + "step": 1020 + }, + { + "epoch": 0.5046336340046954, + "grad_norm": 1.1116685663765398, + "learning_rate": 1.7853485446325055e-05, + "loss": 0.3075249195098877, + "step": 1021 + }, + { + "epoch": 0.5051278882985296, + "grad_norm": 1.135601263046101, + "learning_rate": 1.7848424789006466e-05, + "loss": 0.3473510146141052, + "step": 1022 + }, + { + "epoch": 0.5056221425923638, + "grad_norm": 1.2152682076096186, + "learning_rate": 1.784335889219163e-05, + "loss": 0.3543929159641266, + "step": 1023 + }, + { + "epoch": 0.5061163968861979, + "grad_norm": 1.026549045591816, + "learning_rate": 1.783828775926246e-05, + "loss": 0.3198593556880951, + "step": 1024 + }, + { + "epoch": 0.5066106511800321, + "grad_norm": 1.07796975394457, + "learning_rate": 1.783321139360438e-05, + "loss": 0.34223973751068115, + "step": 1025 + }, + { + "epoch": 0.5071049054738663, + "grad_norm": 1.2487195797385122, + "learning_rate": 1.78281297986063e-05, + "loss": 0.3895387351512909, + "step": 1026 + }, + { + "epoch": 0.5075991597677005, + "grad_norm": 1.0333211037977794, + "learning_rate": 1.782304297766061e-05, + "loss": 0.35764580965042114, + "step": 1027 + }, + { + "epoch": 0.5080934140615346, + "grad_norm": 0.9679048017438919, + "learning_rate": 1.7817950934163213e-05, + "loss": 0.30859488248825073, + "step": 1028 + }, + { + "epoch": 0.5085876683553688, + "grad_norm": 1.0913185130679384, + "learning_rate": 1.7812853671513472e-05, + "loss": 0.3554389476776123, + "step": 1029 + }, + { + "epoch": 0.509081922649203, + "grad_norm": 1.0101463789736986, + "learning_rate": 1.7807751193114254e-05, + "loss": 0.3528766632080078, + "step": 1030 + }, + { + "epoch": 0.5095761769430371, + "grad_norm": 1.054067237260528, + "learning_rate": 1.78026435023719e-05, + "loss": 0.3645275831222534, + "step": 1031 + }, + { + "epoch": 0.5100704312368713, + "grad_norm": 1.338540047449502, + "learning_rate": 1.779753060269623e-05, + "loss": 0.3137075901031494, + "step": 1032 + }, + { + "epoch": 0.5105646855307056, + "grad_norm": 1.0928434325752037, + "learning_rate": 1.7792412497500538e-05, + "loss": 0.31993091106414795, + "step": 1033 + }, + { + "epoch": 0.5110589398245398, + "grad_norm": 1.032718640643118, + "learning_rate": 1.7787289190201606e-05, + "loss": 0.3514295220375061, + "step": 1034 + }, + { + "epoch": 0.5115531941183739, + "grad_norm": 0.9529992201270954, + "learning_rate": 1.7782160684219677e-05, + "loss": 0.3167670667171478, + "step": 1035 + }, + { + "epoch": 0.5120474484122081, + "grad_norm": 1.1056391999630892, + "learning_rate": 1.7777026982978473e-05, + "loss": 0.3298097252845764, + "step": 1036 + }, + { + "epoch": 0.5125417027060423, + "grad_norm": 1.008539858185866, + "learning_rate": 1.777188808990517e-05, + "loss": 0.3334948420524597, + "step": 1037 + }, + { + "epoch": 0.5130359569998765, + "grad_norm": 1.1451382861648118, + "learning_rate": 1.776674400843043e-05, + "loss": 0.3705115020275116, + "step": 1038 + }, + { + "epoch": 0.5135302112937106, + "grad_norm": 1.2062150323771585, + "learning_rate": 1.7761594741988356e-05, + "loss": 0.3586978614330292, + "step": 1039 + }, + { + "epoch": 0.5140244655875448, + "grad_norm": 0.9949081741462515, + "learning_rate": 1.7756440294016535e-05, + "loss": 0.3105466663837433, + "step": 1040 + }, + { + "epoch": 0.514518719881379, + "grad_norm": 1.240576049327348, + "learning_rate": 1.7751280667956002e-05, + "loss": 0.35213470458984375, + "step": 1041 + }, + { + "epoch": 0.5150129741752132, + "grad_norm": 1.1494264660428748, + "learning_rate": 1.7746115867251245e-05, + "loss": 0.3830525875091553, + "step": 1042 + }, + { + "epoch": 0.5155072284690473, + "grad_norm": 1.044917786849415, + "learning_rate": 1.7740945895350215e-05, + "loss": 0.34106165170669556, + "step": 1043 + }, + { + "epoch": 0.5160014827628815, + "grad_norm": 0.9456529066854209, + "learning_rate": 1.773577075570431e-05, + "loss": 0.33408549427986145, + "step": 1044 + }, + { + "epoch": 0.5164957370567157, + "grad_norm": 1.057634132461443, + "learning_rate": 1.7730590451768375e-05, + "loss": 0.32823115587234497, + "step": 1045 + }, + { + "epoch": 0.5169899913505499, + "grad_norm": 0.9870247990943719, + "learning_rate": 1.7725404987000716e-05, + "loss": 0.2866591811180115, + "step": 1046 + }, + { + "epoch": 0.517484245644384, + "grad_norm": 1.0669638645996897, + "learning_rate": 1.772021436486307e-05, + "loss": 0.34053099155426025, + "step": 1047 + }, + { + "epoch": 0.5179784999382182, + "grad_norm": 1.0384310943814752, + "learning_rate": 1.771501858882062e-05, + "loss": 0.30379486083984375, + "step": 1048 + }, + { + "epoch": 0.5184727542320524, + "grad_norm": 1.299899967945095, + "learning_rate": 1.7709817662341998e-05, + "loss": 0.37569302320480347, + "step": 1049 + }, + { + "epoch": 0.5189670085258866, + "grad_norm": 1.0489606422309163, + "learning_rate": 1.770461158889926e-05, + "loss": 0.31770390272140503, + "step": 1050 + }, + { + "epoch": 0.5194612628197207, + "grad_norm": 1.1640089464310481, + "learning_rate": 1.769940037196791e-05, + "loss": 0.34175002574920654, + "step": 1051 + }, + { + "epoch": 0.5199555171135549, + "grad_norm": 1.0797819699416114, + "learning_rate": 1.769418401502689e-05, + "loss": 0.3634580671787262, + "step": 1052 + }, + { + "epoch": 0.5204497714073891, + "grad_norm": 1.1990448584577926, + "learning_rate": 1.7688962521558554e-05, + "loss": 0.3631044030189514, + "step": 1053 + }, + { + "epoch": 0.5209440257012233, + "grad_norm": 1.2482048374766477, + "learning_rate": 1.7683735895048698e-05, + "loss": 0.3402160704135895, + "step": 1054 + }, + { + "epoch": 0.5214382799950574, + "grad_norm": 1.2190765212037056, + "learning_rate": 1.7678504138986548e-05, + "loss": 0.3895665407180786, + "step": 1055 + }, + { + "epoch": 0.5219325342888916, + "grad_norm": 1.076846194861831, + "learning_rate": 1.767326725686475e-05, + "loss": 0.32207030057907104, + "step": 1056 + }, + { + "epoch": 0.5224267885827258, + "grad_norm": 1.10282378456951, + "learning_rate": 1.7668025252179363e-05, + "loss": 0.33095866441726685, + "step": 1057 + }, + { + "epoch": 0.5229210428765599, + "grad_norm": 1.1487800022178571, + "learning_rate": 1.7662778128429883e-05, + "loss": 0.33239442110061646, + "step": 1058 + }, + { + "epoch": 0.5234152971703941, + "grad_norm": 0.9873637767970463, + "learning_rate": 1.7657525889119212e-05, + "loss": 0.27432021498680115, + "step": 1059 + }, + { + "epoch": 0.5239095514642284, + "grad_norm": 1.0928994862368866, + "learning_rate": 1.7652268537753672e-05, + "loss": 0.3221333622932434, + "step": 1060 + }, + { + "epoch": 0.5244038057580626, + "grad_norm": 1.114838100134283, + "learning_rate": 1.764700607784299e-05, + "loss": 0.3126341700553894, + "step": 1061 + }, + { + "epoch": 0.5248980600518967, + "grad_norm": 1.0401864286303986, + "learning_rate": 1.7641738512900315e-05, + "loss": 0.33239883184432983, + "step": 1062 + }, + { + "epoch": 0.5253923143457309, + "grad_norm": 0.9509614150111031, + "learning_rate": 1.7636465846442197e-05, + "loss": 0.30075010657310486, + "step": 1063 + }, + { + "epoch": 0.5258865686395651, + "grad_norm": 1.0717488761603333, + "learning_rate": 1.763118808198859e-05, + "loss": 0.3577713370323181, + "step": 1064 + }, + { + "epoch": 0.5263808229333993, + "grad_norm": 1.0802706273753335, + "learning_rate": 1.7625905223062858e-05, + "loss": 0.3483964204788208, + "step": 1065 + }, + { + "epoch": 0.5268750772272334, + "grad_norm": 1.1651963376515642, + "learning_rate": 1.762061727319176e-05, + "loss": 0.3622454106807709, + "step": 1066 + }, + { + "epoch": 0.5273693315210676, + "grad_norm": 1.0440643033385941, + "learning_rate": 1.761532423590545e-05, + "loss": 0.35156917572021484, + "step": 1067 + }, + { + "epoch": 0.5278635858149018, + "grad_norm": 1.1589394381083906, + "learning_rate": 1.7610026114737498e-05, + "loss": 0.3413820266723633, + "step": 1068 + }, + { + "epoch": 0.528357840108736, + "grad_norm": 1.1280561588615983, + "learning_rate": 1.760472291322484e-05, + "loss": 0.3707934021949768, + "step": 1069 + }, + { + "epoch": 0.5288520944025701, + "grad_norm": 1.2170503232061094, + "learning_rate": 1.7599414634907828e-05, + "loss": 0.3472951054573059, + "step": 1070 + }, + { + "epoch": 0.5293463486964043, + "grad_norm": 1.1676650140216285, + "learning_rate": 1.7594101283330184e-05, + "loss": 0.393882155418396, + "step": 1071 + }, + { + "epoch": 0.5298406029902385, + "grad_norm": 0.9683606994511744, + "learning_rate": 1.758878286203903e-05, + "loss": 0.3094913065433502, + "step": 1072 + }, + { + "epoch": 0.5303348572840727, + "grad_norm": 1.09347684867524, + "learning_rate": 1.758345937458487e-05, + "loss": 0.33904048800468445, + "step": 1073 + }, + { + "epoch": 0.5308291115779068, + "grad_norm": 1.0218184375103434, + "learning_rate": 1.7578130824521585e-05, + "loss": 0.3218901753425598, + "step": 1074 + }, + { + "epoch": 0.531323365871741, + "grad_norm": 0.95615697696865, + "learning_rate": 1.7572797215406442e-05, + "loss": 0.31584852933883667, + "step": 1075 + }, + { + "epoch": 0.5318176201655752, + "grad_norm": 0.9682503945021611, + "learning_rate": 1.756745855080008e-05, + "loss": 0.3449877202510834, + "step": 1076 + }, + { + "epoch": 0.5323118744594094, + "grad_norm": 1.084607183777355, + "learning_rate": 1.756211483426651e-05, + "loss": 0.3544886112213135, + "step": 1077 + }, + { + "epoch": 0.5328061287532435, + "grad_norm": 1.1680618553038933, + "learning_rate": 1.755676606937313e-05, + "loss": 0.34360697865486145, + "step": 1078 + }, + { + "epoch": 0.5333003830470777, + "grad_norm": 1.0514045755368502, + "learning_rate": 1.7551412259690695e-05, + "loss": 0.3214710056781769, + "step": 1079 + }, + { + "epoch": 0.5337946373409119, + "grad_norm": 0.9951048830690797, + "learning_rate": 1.754605340879333e-05, + "loss": 0.33841896057128906, + "step": 1080 + }, + { + "epoch": 0.534288891634746, + "grad_norm": 1.0536673015942455, + "learning_rate": 1.7540689520258532e-05, + "loss": 0.3134745657444, + "step": 1081 + }, + { + "epoch": 0.5347831459285802, + "grad_norm": 1.1773503335041235, + "learning_rate": 1.753532059766715e-05, + "loss": 0.3469204306602478, + "step": 1082 + }, + { + "epoch": 0.5352774002224144, + "grad_norm": 1.3802140663046265, + "learning_rate": 1.752994664460341e-05, + "loss": 0.39217621088027954, + "step": 1083 + }, + { + "epoch": 0.5357716545162486, + "grad_norm": 1.148906185686213, + "learning_rate": 1.7524567664654873e-05, + "loss": 0.34482622146606445, + "step": 1084 + }, + { + "epoch": 0.5362659088100827, + "grad_norm": 1.0089175831530743, + "learning_rate": 1.751918366141248e-05, + "loss": 0.308369517326355, + "step": 1085 + }, + { + "epoch": 0.5367601631039169, + "grad_norm": 1.1441511379564429, + "learning_rate": 1.751379463847051e-05, + "loss": 0.3396676480770111, + "step": 1086 + }, + { + "epoch": 0.5372544173977511, + "grad_norm": 1.0963418237920814, + "learning_rate": 1.7508400599426596e-05, + "loss": 0.3059370517730713, + "step": 1087 + }, + { + "epoch": 0.5377486716915854, + "grad_norm": 0.993693807257297, + "learning_rate": 1.7503001547881728e-05, + "loss": 0.31689077615737915, + "step": 1088 + }, + { + "epoch": 0.5382429259854195, + "grad_norm": 1.2996366258679217, + "learning_rate": 1.749759748744023e-05, + "loss": 0.37134337425231934, + "step": 1089 + }, + { + "epoch": 0.5387371802792537, + "grad_norm": 1.0586799377490923, + "learning_rate": 1.7492188421709775e-05, + "loss": 0.30404967069625854, + "step": 1090 + }, + { + "epoch": 0.5392314345730879, + "grad_norm": 1.1213884593031693, + "learning_rate": 1.7486774354301382e-05, + "loss": 0.34773269295692444, + "step": 1091 + }, + { + "epoch": 0.5397256888669221, + "grad_norm": 1.135256212480744, + "learning_rate": 1.7481355288829404e-05, + "loss": 0.34448760747909546, + "step": 1092 + }, + { + "epoch": 0.5402199431607562, + "grad_norm": 1.1111138178806874, + "learning_rate": 1.7475931228911526e-05, + "loss": 0.33557915687561035, + "step": 1093 + }, + { + "epoch": 0.5407141974545904, + "grad_norm": 1.1277612406863344, + "learning_rate": 1.7470502178168783e-05, + "loss": 0.3216322362422943, + "step": 1094 + }, + { + "epoch": 0.5412084517484246, + "grad_norm": 1.1416777218141756, + "learning_rate": 1.7465068140225524e-05, + "loss": 0.3175346255302429, + "step": 1095 + }, + { + "epoch": 0.5417027060422588, + "grad_norm": 1.0466005920407673, + "learning_rate": 1.7459629118709435e-05, + "loss": 0.3150678277015686, + "step": 1096 + }, + { + "epoch": 0.5421969603360929, + "grad_norm": 1.1080261557130098, + "learning_rate": 1.7454185117251534e-05, + "loss": 0.3372325897216797, + "step": 1097 + }, + { + "epoch": 0.5426912146299271, + "grad_norm": 1.1607395393986693, + "learning_rate": 1.7448736139486156e-05, + "loss": 0.3460095524787903, + "step": 1098 + }, + { + "epoch": 0.5431854689237613, + "grad_norm": 1.0960477562857334, + "learning_rate": 1.7443282189050964e-05, + "loss": 0.3465900421142578, + "step": 1099 + }, + { + "epoch": 0.5436797232175955, + "grad_norm": 1.1271957826518202, + "learning_rate": 1.7437823269586925e-05, + "loss": 0.3707941174507141, + "step": 1100 + }, + { + "epoch": 0.5441739775114296, + "grad_norm": 1.0732325510644303, + "learning_rate": 1.7432359384738354e-05, + "loss": 0.3317713141441345, + "step": 1101 + }, + { + "epoch": 0.5446682318052638, + "grad_norm": 1.10075448775578, + "learning_rate": 1.742689053815285e-05, + "loss": 0.3391956090927124, + "step": 1102 + }, + { + "epoch": 0.545162486099098, + "grad_norm": 1.483156522178114, + "learning_rate": 1.742141673348134e-05, + "loss": 0.3838513195514679, + "step": 1103 + }, + { + "epoch": 0.5456567403929322, + "grad_norm": 1.2368776155357775, + "learning_rate": 1.7415937974378057e-05, + "loss": 0.4438849687576294, + "step": 1104 + }, + { + "epoch": 0.5461509946867663, + "grad_norm": 1.1360365035496875, + "learning_rate": 1.7410454264500542e-05, + "loss": 0.35329896211624146, + "step": 1105 + }, + { + "epoch": 0.5466452489806005, + "grad_norm": 0.9946710480219276, + "learning_rate": 1.7404965607509646e-05, + "loss": 0.3124481439590454, + "step": 1106 + }, + { + "epoch": 0.5471395032744347, + "grad_norm": 1.1827285369169889, + "learning_rate": 1.739947200706951e-05, + "loss": 0.3595995008945465, + "step": 1107 + }, + { + "epoch": 0.5476337575682688, + "grad_norm": 1.0771205850736374, + "learning_rate": 1.7393973466847592e-05, + "loss": 0.35914891958236694, + "step": 1108 + }, + { + "epoch": 0.548128011862103, + "grad_norm": 1.0372075645038734, + "learning_rate": 1.7388469990514636e-05, + "loss": 0.34034737944602966, + "step": 1109 + }, + { + "epoch": 0.5486222661559372, + "grad_norm": 0.9639792162761298, + "learning_rate": 1.7382961581744677e-05, + "loss": 0.3033643066883087, + "step": 1110 + }, + { + "epoch": 0.5491165204497714, + "grad_norm": 1.0333536833038373, + "learning_rate": 1.737744824421506e-05, + "loss": 0.3239862322807312, + "step": 1111 + }, + { + "epoch": 0.5496107747436055, + "grad_norm": 1.0992782883377998, + "learning_rate": 1.7371929981606403e-05, + "loss": 0.36473411321640015, + "step": 1112 + }, + { + "epoch": 0.5501050290374397, + "grad_norm": 0.9808971248907185, + "learning_rate": 1.7366406797602625e-05, + "loss": 0.3129761517047882, + "step": 1113 + }, + { + "epoch": 0.550599283331274, + "grad_norm": 1.0031500416462213, + "learning_rate": 1.736087869589092e-05, + "loss": 0.30224812030792236, + "step": 1114 + }, + { + "epoch": 0.5510935376251082, + "grad_norm": 1.0008522519559948, + "learning_rate": 1.7355345680161774e-05, + "loss": 0.30045247077941895, + "step": 1115 + }, + { + "epoch": 0.5515877919189422, + "grad_norm": 1.1079372723945795, + "learning_rate": 1.7349807754108944e-05, + "loss": 0.3356926739215851, + "step": 1116 + }, + { + "epoch": 0.5520820462127765, + "grad_norm": 1.3704982317685879, + "learning_rate": 1.7344264921429475e-05, + "loss": 0.37749868631362915, + "step": 1117 + }, + { + "epoch": 0.5525763005066107, + "grad_norm": 1.0400914273370205, + "learning_rate": 1.733871718582368e-05, + "loss": 0.331012099981308, + "step": 1118 + }, + { + "epoch": 0.5530705548004449, + "grad_norm": 1.2654046748606915, + "learning_rate": 1.7333164550995153e-05, + "loss": 0.3557187020778656, + "step": 1119 + }, + { + "epoch": 0.553564809094279, + "grad_norm": 1.151377810019934, + "learning_rate": 1.7327607020650744e-05, + "loss": 0.34102991223335266, + "step": 1120 + }, + { + "epoch": 0.5540590633881132, + "grad_norm": 1.0397881413898085, + "learning_rate": 1.7322044598500594e-05, + "loss": 0.328019917011261, + "step": 1121 + }, + { + "epoch": 0.5545533176819474, + "grad_norm": 1.0773058589187376, + "learning_rate": 1.7316477288258085e-05, + "loss": 0.33980751037597656, + "step": 1122 + }, + { + "epoch": 0.5550475719757816, + "grad_norm": 1.1823119583137516, + "learning_rate": 1.731090509363988e-05, + "loss": 0.3460109233856201, + "step": 1123 + }, + { + "epoch": 0.5555418262696157, + "grad_norm": 1.0727245460190564, + "learning_rate": 1.730532801836589e-05, + "loss": 0.3013002276420593, + "step": 1124 + }, + { + "epoch": 0.5560360805634499, + "grad_norm": 1.191952525403325, + "learning_rate": 1.72997460661593e-05, + "loss": 0.36195772886276245, + "step": 1125 + }, + { + "epoch": 0.5565303348572841, + "grad_norm": 1.1481571926267522, + "learning_rate": 1.7294159240746532e-05, + "loss": 0.3368675112724304, + "step": 1126 + }, + { + "epoch": 0.5570245891511183, + "grad_norm": 1.0950064938478345, + "learning_rate": 1.7288567545857283e-05, + "loss": 0.36618539690971375, + "step": 1127 + }, + { + "epoch": 0.5575188434449524, + "grad_norm": 1.0773610015009678, + "learning_rate": 1.7282970985224477e-05, + "loss": 0.3230215311050415, + "step": 1128 + }, + { + "epoch": 0.5580130977387866, + "grad_norm": 1.1539889538468413, + "learning_rate": 1.72773695625843e-05, + "loss": 0.38779711723327637, + "step": 1129 + }, + { + "epoch": 0.5585073520326208, + "grad_norm": 1.0853438524765577, + "learning_rate": 1.7271763281676187e-05, + "loss": 0.33910998702049255, + "step": 1130 + }, + { + "epoch": 0.559001606326455, + "grad_norm": 1.1265909455665821, + "learning_rate": 1.726615214624281e-05, + "loss": 0.3526651859283447, + "step": 1131 + }, + { + "epoch": 0.5594958606202891, + "grad_norm": 1.0899084132349224, + "learning_rate": 1.7260536160030077e-05, + "loss": 0.33794116973876953, + "step": 1132 + }, + { + "epoch": 0.5599901149141233, + "grad_norm": 1.2383181058563666, + "learning_rate": 1.7254915326787145e-05, + "loss": 0.3294123411178589, + "step": 1133 + }, + { + "epoch": 0.5604843692079575, + "grad_norm": 1.0381296685245769, + "learning_rate": 1.7249289650266402e-05, + "loss": 0.31193166971206665, + "step": 1134 + }, + { + "epoch": 0.5609786235017916, + "grad_norm": 1.0273514183990056, + "learning_rate": 1.7243659134223467e-05, + "loss": 0.298290491104126, + "step": 1135 + }, + { + "epoch": 0.5614728777956258, + "grad_norm": 1.0372406743131939, + "learning_rate": 1.7238023782417194e-05, + "loss": 0.3157176971435547, + "step": 1136 + }, + { + "epoch": 0.56196713208946, + "grad_norm": 0.9703670449018593, + "learning_rate": 1.7232383598609664e-05, + "loss": 0.3152535855770111, + "step": 1137 + }, + { + "epoch": 0.5624613863832942, + "grad_norm": 1.1457741905911056, + "learning_rate": 1.722673858656618e-05, + "loss": 0.35004952549934387, + "step": 1138 + }, + { + "epoch": 0.5629556406771283, + "grad_norm": 1.2128755723830003, + "learning_rate": 1.722108875005527e-05, + "loss": 0.3531174957752228, + "step": 1139 + }, + { + "epoch": 0.5634498949709625, + "grad_norm": 0.9896343114056704, + "learning_rate": 1.7215434092848693e-05, + "loss": 0.32532358169555664, + "step": 1140 + }, + { + "epoch": 0.5639441492647967, + "grad_norm": 1.086973420033045, + "learning_rate": 1.7209774618721408e-05, + "loss": 0.3252495229244232, + "step": 1141 + }, + { + "epoch": 0.564438403558631, + "grad_norm": 1.1232225314649664, + "learning_rate": 1.7204110331451603e-05, + "loss": 0.35428208112716675, + "step": 1142 + }, + { + "epoch": 0.564932657852465, + "grad_norm": 1.165276028587328, + "learning_rate": 1.7198441234820674e-05, + "loss": 0.37419646978378296, + "step": 1143 + }, + { + "epoch": 0.5654269121462993, + "grad_norm": 1.1206339776354848, + "learning_rate": 1.7192767332613235e-05, + "loss": 0.3342249095439911, + "step": 1144 + }, + { + "epoch": 0.5659211664401335, + "grad_norm": 1.0700889667237288, + "learning_rate": 1.7187088628617093e-05, + "loss": 0.36827898025512695, + "step": 1145 + }, + { + "epoch": 0.5664154207339677, + "grad_norm": 1.1884715403984119, + "learning_rate": 1.7181405126623275e-05, + "loss": 0.3560858964920044, + "step": 1146 + }, + { + "epoch": 0.5669096750278018, + "grad_norm": 1.0578073497156413, + "learning_rate": 1.7175716830426005e-05, + "loss": 0.35333797335624695, + "step": 1147 + }, + { + "epoch": 0.567403929321636, + "grad_norm": 1.0504095801617317, + "learning_rate": 1.71700237438227e-05, + "loss": 0.31053799390792847, + "step": 1148 + }, + { + "epoch": 0.5678981836154702, + "grad_norm": 1.1443484208273471, + "learning_rate": 1.7164325870613998e-05, + "loss": 0.37123826146125793, + "step": 1149 + }, + { + "epoch": 0.5683924379093044, + "grad_norm": 1.069054169156011, + "learning_rate": 1.715862321460371e-05, + "loss": 0.33981990814208984, + "step": 1150 + }, + { + "epoch": 0.5688866922031385, + "grad_norm": 1.1295222791710222, + "learning_rate": 1.7152915779598846e-05, + "loss": 0.34938257932662964, + "step": 1151 + }, + { + "epoch": 0.5693809464969727, + "grad_norm": 1.10704413276648, + "learning_rate": 1.714720356940961e-05, + "loss": 0.3069387376308441, + "step": 1152 + }, + { + "epoch": 0.5698752007908069, + "grad_norm": 1.1206304490989205, + "learning_rate": 1.7141486587849397e-05, + "loss": 0.34879156947135925, + "step": 1153 + }, + { + "epoch": 0.5703694550846411, + "grad_norm": 1.140159647567344, + "learning_rate": 1.7135764838734773e-05, + "loss": 0.3624545931816101, + "step": 1154 + }, + { + "epoch": 0.5708637093784752, + "grad_norm": 1.0671159168894162, + "learning_rate": 1.7130038325885502e-05, + "loss": 0.3548320531845093, + "step": 1155 + }, + { + "epoch": 0.5713579636723094, + "grad_norm": 1.0469806768045702, + "learning_rate": 1.7124307053124518e-05, + "loss": 0.3004404902458191, + "step": 1156 + }, + { + "epoch": 0.5718522179661436, + "grad_norm": 1.1058227077648823, + "learning_rate": 1.7118571024277943e-05, + "loss": 0.31545472145080566, + "step": 1157 + }, + { + "epoch": 0.5723464722599778, + "grad_norm": 1.100412587450837, + "learning_rate": 1.711283024317506e-05, + "loss": 0.3116477429866791, + "step": 1158 + }, + { + "epoch": 0.5728407265538119, + "grad_norm": 1.1169526030822408, + "learning_rate": 1.710708471364834e-05, + "loss": 0.3472268581390381, + "step": 1159 + }, + { + "epoch": 0.5733349808476461, + "grad_norm": 1.1641407854241053, + "learning_rate": 1.7101334439533414e-05, + "loss": 0.33334046602249146, + "step": 1160 + }, + { + "epoch": 0.5738292351414803, + "grad_norm": 1.1720238639752558, + "learning_rate": 1.7095579424669074e-05, + "loss": 0.3462664783000946, + "step": 1161 + }, + { + "epoch": 0.5743234894353144, + "grad_norm": 1.0854325044336006, + "learning_rate": 1.7089819672897304e-05, + "loss": 0.3241977393627167, + "step": 1162 + }, + { + "epoch": 0.5748177437291486, + "grad_norm": 1.2501733360326688, + "learning_rate": 1.7084055188063217e-05, + "loss": 0.3194134533405304, + "step": 1163 + }, + { + "epoch": 0.5753119980229828, + "grad_norm": 1.1336053472715226, + "learning_rate": 1.7078285974015103e-05, + "loss": 0.3644179701805115, + "step": 1164 + }, + { + "epoch": 0.575806252316817, + "grad_norm": 1.1434067682408584, + "learning_rate": 1.7072512034604412e-05, + "loss": 0.36653730273246765, + "step": 1165 + }, + { + "epoch": 0.5763005066106511, + "grad_norm": 1.1221051792069954, + "learning_rate": 1.706673337368574e-05, + "loss": 0.3435714840888977, + "step": 1166 + }, + { + "epoch": 0.5767947609044853, + "grad_norm": 1.0603782757024258, + "learning_rate": 1.706094999511684e-05, + "loss": 0.36935871839523315, + "step": 1167 + }, + { + "epoch": 0.5772890151983195, + "grad_norm": 0.9845968090919184, + "learning_rate": 1.7055161902758607e-05, + "loss": 0.29493796825408936, + "step": 1168 + }, + { + "epoch": 0.5777832694921538, + "grad_norm": 1.0115254154804856, + "learning_rate": 1.70493691004751e-05, + "loss": 0.32378828525543213, + "step": 1169 + }, + { + "epoch": 0.5782775237859878, + "grad_norm": 1.1123861652198228, + "learning_rate": 1.70435715921335e-05, + "loss": 0.3587600588798523, + "step": 1170 + }, + { + "epoch": 0.578771778079822, + "grad_norm": 1.1091481408248292, + "learning_rate": 1.703776938160415e-05, + "loss": 0.31885826587677, + "step": 1171 + }, + { + "epoch": 0.5792660323736563, + "grad_norm": 1.0414979222224348, + "learning_rate": 1.7031962472760514e-05, + "loss": 0.2950041890144348, + "step": 1172 + }, + { + "epoch": 0.5797602866674905, + "grad_norm": 1.121100234384589, + "learning_rate": 1.7026150869479208e-05, + "loss": 0.36190298199653625, + "step": 1173 + }, + { + "epoch": 0.5802545409613246, + "grad_norm": 1.067632760047313, + "learning_rate": 1.7020334575639972e-05, + "loss": 0.3402514159679413, + "step": 1174 + }, + { + "epoch": 0.5807487952551588, + "grad_norm": 0.9679286148168113, + "learning_rate": 1.7014513595125684e-05, + "loss": 0.3131282925605774, + "step": 1175 + }, + { + "epoch": 0.581243049548993, + "grad_norm": 1.056786860676952, + "learning_rate": 1.7008687931822344e-05, + "loss": 0.29499226808547974, + "step": 1176 + }, + { + "epoch": 0.5817373038428272, + "grad_norm": 1.0712930292635054, + "learning_rate": 1.700285758961908e-05, + "loss": 0.36821871995925903, + "step": 1177 + }, + { + "epoch": 0.5822315581366613, + "grad_norm": 1.2780126948070993, + "learning_rate": 1.6997022572408152e-05, + "loss": 0.31486836075782776, + "step": 1178 + }, + { + "epoch": 0.5827258124304955, + "grad_norm": 1.0778384840117066, + "learning_rate": 1.6991182884084928e-05, + "loss": 0.3176078498363495, + "step": 1179 + }, + { + "epoch": 0.5832200667243297, + "grad_norm": 1.294300282858588, + "learning_rate": 1.69853385285479e-05, + "loss": 0.4130980968475342, + "step": 1180 + }, + { + "epoch": 0.5837143210181639, + "grad_norm": 1.103648457674251, + "learning_rate": 1.697948950969868e-05, + "loss": 0.3164641857147217, + "step": 1181 + }, + { + "epoch": 0.584208575311998, + "grad_norm": 1.1707357674613739, + "learning_rate": 1.697363583144199e-05, + "loss": 0.36420726776123047, + "step": 1182 + }, + { + "epoch": 0.5847028296058322, + "grad_norm": 1.1827091905189109, + "learning_rate": 1.696777749768566e-05, + "loss": 0.3279833197593689, + "step": 1183 + }, + { + "epoch": 0.5851970838996664, + "grad_norm": 1.2462082843052198, + "learning_rate": 1.696191451234063e-05, + "loss": 0.311473548412323, + "step": 1184 + }, + { + "epoch": 0.5856913381935006, + "grad_norm": 1.0514702517271486, + "learning_rate": 1.6956046879320943e-05, + "loss": 0.32284629344940186, + "step": 1185 + }, + { + "epoch": 0.5861855924873347, + "grad_norm": 1.081683685343838, + "learning_rate": 1.6950174602543753e-05, + "loss": 0.3318635821342468, + "step": 1186 + }, + { + "epoch": 0.5866798467811689, + "grad_norm": 1.10655975155716, + "learning_rate": 1.6944297685929298e-05, + "loss": 0.3268307149410248, + "step": 1187 + }, + { + "epoch": 0.5871741010750031, + "grad_norm": 1.1757413336808826, + "learning_rate": 1.6938416133400934e-05, + "loss": 0.31885889172554016, + "step": 1188 + }, + { + "epoch": 0.5876683553688372, + "grad_norm": 1.044019985672413, + "learning_rate": 1.69325299488851e-05, + "loss": 0.29273971915245056, + "step": 1189 + }, + { + "epoch": 0.5881626096626714, + "grad_norm": 1.2128861059808687, + "learning_rate": 1.692663913631132e-05, + "loss": 0.3585188388824463, + "step": 1190 + }, + { + "epoch": 0.5886568639565056, + "grad_norm": 1.152183266519285, + "learning_rate": 1.6920743699612226e-05, + "loss": 0.37145692110061646, + "step": 1191 + }, + { + "epoch": 0.5891511182503398, + "grad_norm": 1.1211663085079848, + "learning_rate": 1.691484364272352e-05, + "loss": 0.34805262088775635, + "step": 1192 + }, + { + "epoch": 0.5896453725441739, + "grad_norm": 1.1094913177494823, + "learning_rate": 1.6908938969584002e-05, + "loss": 0.3540152907371521, + "step": 1193 + }, + { + "epoch": 0.5901396268380081, + "grad_norm": 1.1138288622940957, + "learning_rate": 1.6903029684135545e-05, + "loss": 0.35808512568473816, + "step": 1194 + }, + { + "epoch": 0.5906338811318423, + "grad_norm": 1.2028693910668573, + "learning_rate": 1.68971157903231e-05, + "loss": 0.2881169021129608, + "step": 1195 + }, + { + "epoch": 0.5911281354256765, + "grad_norm": 1.126509020875868, + "learning_rate": 1.6891197292094704e-05, + "loss": 0.33551955223083496, + "step": 1196 + }, + { + "epoch": 0.5916223897195106, + "grad_norm": 1.0141998416691063, + "learning_rate": 1.688527419340146e-05, + "loss": 0.30721622705459595, + "step": 1197 + }, + { + "epoch": 0.5921166440133449, + "grad_norm": 1.0876501850612135, + "learning_rate": 1.687934649819754e-05, + "loss": 0.3296341300010681, + "step": 1198 + }, + { + "epoch": 0.5926108983071791, + "grad_norm": 1.1194456964334092, + "learning_rate": 1.6873414210440194e-05, + "loss": 0.3511606454849243, + "step": 1199 + }, + { + "epoch": 0.5931051526010133, + "grad_norm": 1.0762712673108126, + "learning_rate": 1.6867477334089728e-05, + "loss": 0.34293919801712036, + "step": 1200 + }, + { + "epoch": 0.5935994068948474, + "grad_norm": 0.9942852659141888, + "learning_rate": 1.686153587310952e-05, + "loss": 0.3334580659866333, + "step": 1201 + }, + { + "epoch": 0.5940936611886816, + "grad_norm": 1.1354238373080972, + "learning_rate": 1.6855589831466e-05, + "loss": 0.3542851209640503, + "step": 1202 + }, + { + "epoch": 0.5945879154825158, + "grad_norm": 1.0952906678959344, + "learning_rate": 1.6849639213128667e-05, + "loss": 0.30951520800590515, + "step": 1203 + }, + { + "epoch": 0.59508216977635, + "grad_norm": 1.0716710567299268, + "learning_rate": 1.6843684022070062e-05, + "loss": 0.333478718996048, + "step": 1204 + }, + { + "epoch": 0.5955764240701841, + "grad_norm": 1.0944556204789582, + "learning_rate": 1.683772426226579e-05, + "loss": 0.33562588691711426, + "step": 1205 + }, + { + "epoch": 0.5960706783640183, + "grad_norm": 0.9136596878493712, + "learning_rate": 1.6831759937694497e-05, + "loss": 0.2626678943634033, + "step": 1206 + }, + { + "epoch": 0.5965649326578525, + "grad_norm": 1.1138721974001247, + "learning_rate": 1.6825791052337884e-05, + "loss": 0.349543035030365, + "step": 1207 + }, + { + "epoch": 0.5970591869516867, + "grad_norm": 1.0760285856821303, + "learning_rate": 1.6819817610180696e-05, + "loss": 0.3229057788848877, + "step": 1208 + }, + { + "epoch": 0.5975534412455208, + "grad_norm": 1.0511960959262137, + "learning_rate": 1.681383961521071e-05, + "loss": 0.32023823261260986, + "step": 1209 + }, + { + "epoch": 0.598047695539355, + "grad_norm": 1.0122201188951288, + "learning_rate": 1.680785707141876e-05, + "loss": 0.31556791067123413, + "step": 1210 + }, + { + "epoch": 0.5985419498331892, + "grad_norm": 1.1858949236151264, + "learning_rate": 1.68018699827987e-05, + "loss": 0.33287158608436584, + "step": 1211 + }, + { + "epoch": 0.5990362041270234, + "grad_norm": 1.0276520854994282, + "learning_rate": 1.6795878353347427e-05, + "loss": 0.28690433502197266, + "step": 1212 + }, + { + "epoch": 0.5995304584208575, + "grad_norm": 1.1202382723881081, + "learning_rate": 1.6789882187064862e-05, + "loss": 0.3501484990119934, + "step": 1213 + }, + { + "epoch": 0.6000247127146917, + "grad_norm": 1.15016872261832, + "learning_rate": 1.678388148795397e-05, + "loss": 0.3645259439945221, + "step": 1214 + }, + { + "epoch": 0.6005189670085259, + "grad_norm": 1.0232559071014062, + "learning_rate": 1.6777876260020726e-05, + "loss": 0.3270183801651001, + "step": 1215 + }, + { + "epoch": 0.60101322130236, + "grad_norm": 1.0680433488207848, + "learning_rate": 1.6771866507274132e-05, + "loss": 0.31767967343330383, + "step": 1216 + }, + { + "epoch": 0.6015074755961942, + "grad_norm": 1.0642272352631703, + "learning_rate": 1.6765852233726216e-05, + "loss": 0.3170120120048523, + "step": 1217 + }, + { + "epoch": 0.6020017298900284, + "grad_norm": 1.0689193394735252, + "learning_rate": 1.6759833443392022e-05, + "loss": 0.3270176351070404, + "step": 1218 + }, + { + "epoch": 0.6024959841838626, + "grad_norm": 1.0053062396233938, + "learning_rate": 1.6753810140289608e-05, + "loss": 0.3229079246520996, + "step": 1219 + }, + { + "epoch": 0.6029902384776967, + "grad_norm": 1.060220470914707, + "learning_rate": 1.6747782328440044e-05, + "loss": 0.3366449773311615, + "step": 1220 + }, + { + "epoch": 0.6034844927715309, + "grad_norm": 1.2656940979343048, + "learning_rate": 1.674175001186741e-05, + "loss": 0.4027010500431061, + "step": 1221 + }, + { + "epoch": 0.6039787470653651, + "grad_norm": 1.039989374871811, + "learning_rate": 1.6735713194598798e-05, + "loss": 0.31566083431243896, + "step": 1222 + }, + { + "epoch": 0.6044730013591993, + "grad_norm": 1.1667815915058346, + "learning_rate": 1.67296718806643e-05, + "loss": 0.3361780047416687, + "step": 1223 + }, + { + "epoch": 0.6049672556530334, + "grad_norm": 1.0628494144880791, + "learning_rate": 1.6723626074097007e-05, + "loss": 0.3197939693927765, + "step": 1224 + }, + { + "epoch": 0.6054615099468676, + "grad_norm": 1.078571350485402, + "learning_rate": 1.671757577893302e-05, + "loss": 0.32977360486984253, + "step": 1225 + }, + { + "epoch": 0.6059557642407019, + "grad_norm": 1.1192119082687915, + "learning_rate": 1.671152099921142e-05, + "loss": 0.3434401750564575, + "step": 1226 + }, + { + "epoch": 0.6064500185345361, + "grad_norm": 1.0664877094913836, + "learning_rate": 1.67054617389743e-05, + "loss": 0.33856305480003357, + "step": 1227 + }, + { + "epoch": 0.6069442728283702, + "grad_norm": 1.147959053573069, + "learning_rate": 1.669939800226673e-05, + "loss": 0.31594911217689514, + "step": 1228 + }, + { + "epoch": 0.6074385271222044, + "grad_norm": 1.105417739927691, + "learning_rate": 1.669332979313678e-05, + "loss": 0.32347679138183594, + "step": 1229 + }, + { + "epoch": 0.6079327814160386, + "grad_norm": 1.1057400329817928, + "learning_rate": 1.6687257115635492e-05, + "loss": 0.32733607292175293, + "step": 1230 + }, + { + "epoch": 0.6084270357098728, + "grad_norm": 0.9869005136013326, + "learning_rate": 1.6681179973816908e-05, + "loss": 0.306827187538147, + "step": 1231 + }, + { + "epoch": 0.6089212900037069, + "grad_norm": 1.068802395839477, + "learning_rate": 1.667509837173803e-05, + "loss": 0.3515884280204773, + "step": 1232 + }, + { + "epoch": 0.6094155442975411, + "grad_norm": 1.0062662165973097, + "learning_rate": 1.6669012313458862e-05, + "loss": 0.28699082136154175, + "step": 1233 + }, + { + "epoch": 0.6099097985913753, + "grad_norm": 1.0697164166178312, + "learning_rate": 1.6662921803042356e-05, + "loss": 0.30737537145614624, + "step": 1234 + }, + { + "epoch": 0.6104040528852095, + "grad_norm": 1.0782793991023802, + "learning_rate": 1.665682684455446e-05, + "loss": 0.3193345069885254, + "step": 1235 + }, + { + "epoch": 0.6108983071790436, + "grad_norm": 1.1629258901733988, + "learning_rate": 1.6650727442064073e-05, + "loss": 0.3326336741447449, + "step": 1236 + }, + { + "epoch": 0.6113925614728778, + "grad_norm": 1.0950813589125916, + "learning_rate": 1.6644623599643076e-05, + "loss": 0.2967267632484436, + "step": 1237 + }, + { + "epoch": 0.611886815766712, + "grad_norm": 1.104366364956542, + "learning_rate": 1.66385153213663e-05, + "loss": 0.3163914084434509, + "step": 1238 + }, + { + "epoch": 0.6123810700605461, + "grad_norm": 1.1913476484695409, + "learning_rate": 1.663240261131155e-05, + "loss": 0.40281808376312256, + "step": 1239 + }, + { + "epoch": 0.6128753243543803, + "grad_norm": 1.1744917859448287, + "learning_rate": 1.6626285473559586e-05, + "loss": 0.33946287631988525, + "step": 1240 + }, + { + "epoch": 0.6133695786482145, + "grad_norm": 1.121011060895708, + "learning_rate": 1.6620163912194114e-05, + "loss": 0.3750913143157959, + "step": 1241 + }, + { + "epoch": 0.6138638329420487, + "grad_norm": 1.1601773319994575, + "learning_rate": 1.6614037931301804e-05, + "loss": 0.32449400424957275, + "step": 1242 + }, + { + "epoch": 0.6143580872358828, + "grad_norm": 1.146035054497973, + "learning_rate": 1.6607907534972277e-05, + "loss": 0.3484799861907959, + "step": 1243 + }, + { + "epoch": 0.614852341529717, + "grad_norm": 1.0478699674323781, + "learning_rate": 1.6601772727298095e-05, + "loss": 0.2991127669811249, + "step": 1244 + }, + { + "epoch": 0.6153465958235512, + "grad_norm": 1.0941316253076903, + "learning_rate": 1.6595633512374768e-05, + "loss": 0.339094340801239, + "step": 1245 + }, + { + "epoch": 0.6158408501173854, + "grad_norm": 1.0756027047064132, + "learning_rate": 1.6589489894300744e-05, + "loss": 0.3147842288017273, + "step": 1246 + }, + { + "epoch": 0.6163351044112195, + "grad_norm": 1.0944450465347566, + "learning_rate": 1.6583341877177427e-05, + "loss": 0.3036183714866638, + "step": 1247 + }, + { + "epoch": 0.6168293587050537, + "grad_norm": 1.0983853525092009, + "learning_rate": 1.657718946510913e-05, + "loss": 0.32657095789909363, + "step": 1248 + }, + { + "epoch": 0.6173236129988879, + "grad_norm": 1.0660730573251251, + "learning_rate": 1.6571032662203126e-05, + "loss": 0.3104664385318756, + "step": 1249 + }, + { + "epoch": 0.6178178672927221, + "grad_norm": 1.0675015064613533, + "learning_rate": 1.6564871472569604e-05, + "loss": 0.30392807722091675, + "step": 1250 + }, + { + "epoch": 0.6183121215865562, + "grad_norm": 1.080894190005694, + "learning_rate": 1.655870590032169e-05, + "loss": 0.3087356388568878, + "step": 1251 + }, + { + "epoch": 0.6188063758803904, + "grad_norm": 1.0633256442775108, + "learning_rate": 1.6552535949575427e-05, + "loss": 0.3220480978488922, + "step": 1252 + }, + { + "epoch": 0.6193006301742247, + "grad_norm": 1.0867949301055795, + "learning_rate": 1.654636162444979e-05, + "loss": 0.33925485610961914, + "step": 1253 + }, + { + "epoch": 0.6197948844680589, + "grad_norm": 1.0651223448844926, + "learning_rate": 1.6540182929066667e-05, + "loss": 0.3704617917537689, + "step": 1254 + }, + { + "epoch": 0.620289138761893, + "grad_norm": 1.1158405395395257, + "learning_rate": 1.653399986755087e-05, + "loss": 0.33745670318603516, + "step": 1255 + }, + { + "epoch": 0.6207833930557272, + "grad_norm": 1.1397943957058634, + "learning_rate": 1.6527812444030118e-05, + "loss": 0.31651467084884644, + "step": 1256 + }, + { + "epoch": 0.6212776473495614, + "grad_norm": 1.141112365152985, + "learning_rate": 1.6521620662635053e-05, + "loss": 0.360455185174942, + "step": 1257 + }, + { + "epoch": 0.6217719016433956, + "grad_norm": 1.0000307812773819, + "learning_rate": 1.6515424527499214e-05, + "loss": 0.32819390296936035, + "step": 1258 + }, + { + "epoch": 0.6222661559372297, + "grad_norm": 1.229539015248975, + "learning_rate": 1.6509224042759053e-05, + "loss": 0.38759690523147583, + "step": 1259 + }, + { + "epoch": 0.6227604102310639, + "grad_norm": 1.127403937815861, + "learning_rate": 1.6503019212553932e-05, + "loss": 0.34250545501708984, + "step": 1260 + }, + { + "epoch": 0.6232546645248981, + "grad_norm": 1.0060644367410545, + "learning_rate": 1.6496810041026097e-05, + "loss": 0.3120163679122925, + "step": 1261 + }, + { + "epoch": 0.6237489188187323, + "grad_norm": 1.1050188267024101, + "learning_rate": 1.649059653232071e-05, + "loss": 0.35985836386680603, + "step": 1262 + }, + { + "epoch": 0.6242431731125664, + "grad_norm": 1.0877426950647728, + "learning_rate": 1.648437869058581e-05, + "loss": 0.3551288843154907, + "step": 1263 + }, + { + "epoch": 0.6247374274064006, + "grad_norm": 1.095568415742879, + "learning_rate": 1.6478156519972354e-05, + "loss": 0.33047816157341003, + "step": 1264 + }, + { + "epoch": 0.6252316817002348, + "grad_norm": 1.0643242802432207, + "learning_rate": 1.6471930024634164e-05, + "loss": 0.32909417152404785, + "step": 1265 + }, + { + "epoch": 0.6257259359940689, + "grad_norm": 1.07195158812182, + "learning_rate": 1.6465699208727964e-05, + "loss": 0.3726924657821655, + "step": 1266 + }, + { + "epoch": 0.6262201902879031, + "grad_norm": 1.1316893144153, + "learning_rate": 1.6459464076413355e-05, + "loss": 0.3569204807281494, + "step": 1267 + }, + { + "epoch": 0.6267144445817373, + "grad_norm": 1.0125649890138406, + "learning_rate": 1.6453224631852825e-05, + "loss": 0.33798107504844666, + "step": 1268 + }, + { + "epoch": 0.6272086988755715, + "grad_norm": 1.1537944647220344, + "learning_rate": 1.644698087921173e-05, + "loss": 0.32891637086868286, + "step": 1269 + }, + { + "epoch": 0.6277029531694056, + "grad_norm": 1.1246833616649612, + "learning_rate": 1.644073282265832e-05, + "loss": 0.31512969732284546, + "step": 1270 + }, + { + "epoch": 0.6281972074632398, + "grad_norm": 1.1199823464164773, + "learning_rate": 1.643448046636371e-05, + "loss": 0.350041925907135, + "step": 1271 + }, + { + "epoch": 0.628691461757074, + "grad_norm": 1.0925989435954497, + "learning_rate": 1.642822381450187e-05, + "loss": 0.3248854875564575, + "step": 1272 + }, + { + "epoch": 0.6291857160509082, + "grad_norm": 1.0344569444697491, + "learning_rate": 1.6421962871249662e-05, + "loss": 0.3031661808490753, + "step": 1273 + }, + { + "epoch": 0.6296799703447423, + "grad_norm": 1.0843035546126185, + "learning_rate": 1.6415697640786802e-05, + "loss": 0.2903754711151123, + "step": 1274 + }, + { + "epoch": 0.6301742246385765, + "grad_norm": 1.0122518499053432, + "learning_rate": 1.6409428127295864e-05, + "loss": 0.300454318523407, + "step": 1275 + }, + { + "epoch": 0.6306684789324107, + "grad_norm": 1.0842968830814483, + "learning_rate": 1.6403154334962286e-05, + "loss": 0.3430244028568268, + "step": 1276 + }, + { + "epoch": 0.6311627332262449, + "grad_norm": 1.1383634793407482, + "learning_rate": 1.6396876267974367e-05, + "loss": 0.3728436827659607, + "step": 1277 + }, + { + "epoch": 0.631656987520079, + "grad_norm": 1.103371729978927, + "learning_rate": 1.639059393052325e-05, + "loss": 0.3021183907985687, + "step": 1278 + }, + { + "epoch": 0.6321512418139132, + "grad_norm": 1.0649900935701406, + "learning_rate": 1.6384307326802934e-05, + "loss": 0.3313615918159485, + "step": 1279 + }, + { + "epoch": 0.6326454961077475, + "grad_norm": 1.0519110395000262, + "learning_rate": 1.637801646101027e-05, + "loss": 0.32833239436149597, + "step": 1280 + }, + { + "epoch": 0.6331397504015817, + "grad_norm": 1.1672616485147485, + "learning_rate": 1.6371721337344947e-05, + "loss": 0.3575769066810608, + "step": 1281 + }, + { + "epoch": 0.6336340046954158, + "grad_norm": 1.044512245658177, + "learning_rate": 1.6365421960009502e-05, + "loss": 0.33323729038238525, + "step": 1282 + }, + { + "epoch": 0.63412825898925, + "grad_norm": 1.150185694461945, + "learning_rate": 1.6359118333209307e-05, + "loss": 0.3522900938987732, + "step": 1283 + }, + { + "epoch": 0.6346225132830842, + "grad_norm": 1.2143932108960407, + "learning_rate": 1.635281046115257e-05, + "loss": 0.3350796699523926, + "step": 1284 + }, + { + "epoch": 0.6351167675769184, + "grad_norm": 1.2071815938700088, + "learning_rate": 1.6346498348050342e-05, + "loss": 0.350632905960083, + "step": 1285 + }, + { + "epoch": 0.6356110218707525, + "grad_norm": 1.0108749382306044, + "learning_rate": 1.6340181998116494e-05, + "loss": 0.2961253523826599, + "step": 1286 + }, + { + "epoch": 0.6361052761645867, + "grad_norm": 1.3686468141070485, + "learning_rate": 1.6333861415567736e-05, + "loss": 0.35736170411109924, + "step": 1287 + }, + { + "epoch": 0.6365995304584209, + "grad_norm": 1.1749750672779442, + "learning_rate": 1.63275366046236e-05, + "loss": 0.35654571652412415, + "step": 1288 + }, + { + "epoch": 0.6370937847522551, + "grad_norm": 1.0658003578898634, + "learning_rate": 1.6321207569506435e-05, + "loss": 0.30518224835395813, + "step": 1289 + }, + { + "epoch": 0.6375880390460892, + "grad_norm": 1.1007851387105425, + "learning_rate": 1.6314874314441413e-05, + "loss": 0.35099470615386963, + "step": 1290 + }, + { + "epoch": 0.6380822933399234, + "grad_norm": 1.0971286067217327, + "learning_rate": 1.6308536843656528e-05, + "loss": 0.3577536344528198, + "step": 1291 + }, + { + "epoch": 0.6385765476337576, + "grad_norm": 1.0395121014513669, + "learning_rate": 1.6302195161382586e-05, + "loss": 0.3141167163848877, + "step": 1292 + }, + { + "epoch": 0.6390708019275917, + "grad_norm": 0.981608659730199, + "learning_rate": 1.62958492718532e-05, + "loss": 0.2920055389404297, + "step": 1293 + }, + { + "epoch": 0.6395650562214259, + "grad_norm": 1.0875768517352407, + "learning_rate": 1.6289499179304797e-05, + "loss": 0.32826486229896545, + "step": 1294 + }, + { + "epoch": 0.6400593105152601, + "grad_norm": 1.0051851075633542, + "learning_rate": 1.628314488797661e-05, + "loss": 0.3080480992794037, + "step": 1295 + }, + { + "epoch": 0.6405535648090943, + "grad_norm": 1.006537470660458, + "learning_rate": 1.627678640211067e-05, + "loss": 0.304529070854187, + "step": 1296 + }, + { + "epoch": 0.6410478191029284, + "grad_norm": 1.1108978139615113, + "learning_rate": 1.627042372595181e-05, + "loss": 0.34653496742248535, + "step": 1297 + }, + { + "epoch": 0.6415420733967626, + "grad_norm": 0.9745027779333038, + "learning_rate": 1.6264056863747667e-05, + "loss": 0.2938673496246338, + "step": 1298 + }, + { + "epoch": 0.6420363276905968, + "grad_norm": 1.1585281714148792, + "learning_rate": 1.625768581974866e-05, + "loss": 0.32350343465805054, + "step": 1299 + }, + { + "epoch": 0.642530581984431, + "grad_norm": 1.0756982630474194, + "learning_rate": 1.6251310598208015e-05, + "loss": 0.3175384998321533, + "step": 1300 + }, + { + "epoch": 0.6430248362782651, + "grad_norm": 1.1335110071944674, + "learning_rate": 1.6244931203381734e-05, + "loss": 0.32667648792266846, + "step": 1301 + }, + { + "epoch": 0.6435190905720993, + "grad_norm": 0.9986052180267636, + "learning_rate": 1.623854763952861e-05, + "loss": 0.30110976099967957, + "step": 1302 + }, + { + "epoch": 0.6440133448659335, + "grad_norm": 1.2219754266907614, + "learning_rate": 1.6232159910910224e-05, + "loss": 0.3508617579936981, + "step": 1303 + }, + { + "epoch": 0.6445075991597677, + "grad_norm": 1.1027211796126624, + "learning_rate": 1.622576802179092e-05, + "loss": 0.34416183829307556, + "step": 1304 + }, + { + "epoch": 0.6450018534536018, + "grad_norm": 1.1267200023483468, + "learning_rate": 1.6219371976437847e-05, + "loss": 0.3509306311607361, + "step": 1305 + }, + { + "epoch": 0.645496107747436, + "grad_norm": 1.1746524244290708, + "learning_rate": 1.6212971779120904e-05, + "loss": 0.36186683177948, + "step": 1306 + }, + { + "epoch": 0.6459903620412702, + "grad_norm": 1.128374133277422, + "learning_rate": 1.6206567434112776e-05, + "loss": 0.3123924732208252, + "step": 1307 + }, + { + "epoch": 0.6464846163351045, + "grad_norm": 1.2141772034453755, + "learning_rate": 1.6200158945688907e-05, + "loss": 0.3691411018371582, + "step": 1308 + }, + { + "epoch": 0.6469788706289386, + "grad_norm": 1.1011618758034853, + "learning_rate": 1.6193746318127516e-05, + "loss": 0.3136986792087555, + "step": 1309 + }, + { + "epoch": 0.6474731249227728, + "grad_norm": 1.0883839992045683, + "learning_rate": 1.6187329555709585e-05, + "loss": 0.30374211072921753, + "step": 1310 + }, + { + "epoch": 0.647967379216607, + "grad_norm": 1.207837369942263, + "learning_rate": 1.618090866271884e-05, + "loss": 0.3633323907852173, + "step": 1311 + }, + { + "epoch": 0.6484616335104412, + "grad_norm": 1.056749654034174, + "learning_rate": 1.6174483643441795e-05, + "loss": 0.31395208835601807, + "step": 1312 + }, + { + "epoch": 0.6489558878042753, + "grad_norm": 1.0312943002596973, + "learning_rate": 1.6168054502167687e-05, + "loss": 0.29258471727371216, + "step": 1313 + }, + { + "epoch": 0.6494501420981095, + "grad_norm": 1.052844702612926, + "learning_rate": 1.6161621243188528e-05, + "loss": 0.3086007833480835, + "step": 1314 + }, + { + "epoch": 0.6499443963919437, + "grad_norm": 1.1099907156572013, + "learning_rate": 1.6155183870799063e-05, + "loss": 0.3604614734649658, + "step": 1315 + }, + { + "epoch": 0.6504386506857779, + "grad_norm": 1.230657559418624, + "learning_rate": 1.614874238929679e-05, + "loss": 0.3784678876399994, + "step": 1316 + }, + { + "epoch": 0.650932904979612, + "grad_norm": 0.9692609071600233, + "learning_rate": 1.6142296802981957e-05, + "loss": 0.29009610414505005, + "step": 1317 + }, + { + "epoch": 0.6514271592734462, + "grad_norm": 1.1385261282180998, + "learning_rate": 1.6135847116157542e-05, + "loss": 0.3667104244232178, + "step": 1318 + }, + { + "epoch": 0.6519214135672804, + "grad_norm": 1.0454111919656257, + "learning_rate": 1.6129393333129262e-05, + "loss": 0.3100985884666443, + "step": 1319 + }, + { + "epoch": 0.6524156678611145, + "grad_norm": 1.0967001531345488, + "learning_rate": 1.612293545820557e-05, + "loss": 0.34128522872924805, + "step": 1320 + }, + { + "epoch": 0.6529099221549487, + "grad_norm": 1.016572733864691, + "learning_rate": 1.611647349569765e-05, + "loss": 0.3017216920852661, + "step": 1321 + }, + { + "epoch": 0.6534041764487829, + "grad_norm": 1.0979244854260226, + "learning_rate": 1.611000744991942e-05, + "loss": 0.35060590505599976, + "step": 1322 + }, + { + "epoch": 0.6538984307426171, + "grad_norm": 1.180855026456707, + "learning_rate": 1.610353732518752e-05, + "loss": 0.3766549825668335, + "step": 1323 + }, + { + "epoch": 0.6543926850364512, + "grad_norm": 0.9954937284294141, + "learning_rate": 1.609706312582131e-05, + "loss": 0.2970678210258484, + "step": 1324 + }, + { + "epoch": 0.6548869393302854, + "grad_norm": 1.2407304893003468, + "learning_rate": 1.609058485614287e-05, + "loss": 0.3345789909362793, + "step": 1325 + }, + { + "epoch": 0.6553811936241196, + "grad_norm": 1.159801774337048, + "learning_rate": 1.608410252047701e-05, + "loss": 0.34838157892227173, + "step": 1326 + }, + { + "epoch": 0.6558754479179538, + "grad_norm": 1.052743453114199, + "learning_rate": 1.6077616123151232e-05, + "loss": 0.27454087138175964, + "step": 1327 + }, + { + "epoch": 0.6563697022117879, + "grad_norm": 1.1304513457691607, + "learning_rate": 1.607112566849577e-05, + "loss": 0.3372647762298584, + "step": 1328 + }, + { + "epoch": 0.6568639565056221, + "grad_norm": 1.1678098502989476, + "learning_rate": 1.606463116084356e-05, + "loss": 0.34433993697166443, + "step": 1329 + }, + { + "epoch": 0.6573582107994563, + "grad_norm": 1.0760327464429003, + "learning_rate": 1.6058132604530242e-05, + "loss": 0.3267759382724762, + "step": 1330 + }, + { + "epoch": 0.6578524650932905, + "grad_norm": 1.044029067228307, + "learning_rate": 1.6051630003894155e-05, + "loss": 0.3022347390651703, + "step": 1331 + }, + { + "epoch": 0.6583467193871246, + "grad_norm": 1.0701124312590375, + "learning_rate": 1.604512336327634e-05, + "loss": 0.32478266954421997, + "step": 1332 + }, + { + "epoch": 0.6588409736809588, + "grad_norm": 1.1194211733981758, + "learning_rate": 1.6038612687020548e-05, + "loss": 0.32039204239845276, + "step": 1333 + }, + { + "epoch": 0.659335227974793, + "grad_norm": 1.189072572166891, + "learning_rate": 1.6032097979473203e-05, + "loss": 0.3376410901546478, + "step": 1334 + }, + { + "epoch": 0.6598294822686273, + "grad_norm": 1.0209465387535948, + "learning_rate": 1.6025579244983443e-05, + "loss": 0.28432029485702515, + "step": 1335 + }, + { + "epoch": 0.6603237365624613, + "grad_norm": 1.1101085579973957, + "learning_rate": 1.6019056487903067e-05, + "loss": 0.3349001109600067, + "step": 1336 + }, + { + "epoch": 0.6608179908562956, + "grad_norm": 1.016991018325495, + "learning_rate": 1.601252971258658e-05, + "loss": 0.27995598316192627, + "step": 1337 + }, + { + "epoch": 0.6613122451501298, + "grad_norm": 1.0652875110729838, + "learning_rate": 1.6005998923391172e-05, + "loss": 0.28326892852783203, + "step": 1338 + }, + { + "epoch": 0.661806499443964, + "grad_norm": 1.1089400050162956, + "learning_rate": 1.5999464124676697e-05, + "loss": 0.3139200806617737, + "step": 1339 + }, + { + "epoch": 0.6623007537377981, + "grad_norm": 1.0857703956199403, + "learning_rate": 1.5992925320805688e-05, + "loss": 0.32395505905151367, + "step": 1340 + }, + { + "epoch": 0.6627950080316323, + "grad_norm": 1.187400707476865, + "learning_rate": 1.598638251614337e-05, + "loss": 0.35880255699157715, + "step": 1341 + }, + { + "epoch": 0.6632892623254665, + "grad_norm": 1.1264632686384342, + "learning_rate": 1.5979835715057616e-05, + "loss": 0.3696775436401367, + "step": 1342 + }, + { + "epoch": 0.6637835166193007, + "grad_norm": 1.2084738763641774, + "learning_rate": 1.597328492191898e-05, + "loss": 0.38413193821907043, + "step": 1343 + }, + { + "epoch": 0.6642777709131348, + "grad_norm": 2.0572947223290017, + "learning_rate": 1.596673014110068e-05, + "loss": 0.3564830720424652, + "step": 1344 + }, + { + "epoch": 0.664772025206969, + "grad_norm": 1.0170026931569898, + "learning_rate": 1.5960171376978587e-05, + "loss": 0.30634552240371704, + "step": 1345 + }, + { + "epoch": 0.6652662795008032, + "grad_norm": 1.0375692111937291, + "learning_rate": 1.595360863393125e-05, + "loss": 0.27113068103790283, + "step": 1346 + }, + { + "epoch": 0.6657605337946373, + "grad_norm": 1.242773829739391, + "learning_rate": 1.594704191633985e-05, + "loss": 0.34015512466430664, + "step": 1347 + }, + { + "epoch": 0.6662547880884715, + "grad_norm": 0.9724222230737607, + "learning_rate": 1.594047122858824e-05, + "loss": 0.2509229779243469, + "step": 1348 + }, + { + "epoch": 0.6667490423823057, + "grad_norm": 1.0705371704599513, + "learning_rate": 1.5933896575062922e-05, + "loss": 0.35122111439704895, + "step": 1349 + }, + { + "epoch": 0.6672432966761399, + "grad_norm": 1.0469402955634624, + "learning_rate": 1.592731796015303e-05, + "loss": 0.3656314015388489, + "step": 1350 + }, + { + "epoch": 0.667737550969974, + "grad_norm": 1.0980190562444532, + "learning_rate": 1.5920735388250363e-05, + "loss": 0.3482551574707031, + "step": 1351 + }, + { + "epoch": 0.6682318052638082, + "grad_norm": 0.9987728958846398, + "learning_rate": 1.5914148863749344e-05, + "loss": 0.2852175831794739, + "step": 1352 + }, + { + "epoch": 0.6687260595576424, + "grad_norm": 1.1231968462948256, + "learning_rate": 1.590755839104705e-05, + "loss": 0.3435940742492676, + "step": 1353 + }, + { + "epoch": 0.6692203138514766, + "grad_norm": 1.2334019463480403, + "learning_rate": 1.590096397454318e-05, + "loss": 0.34816527366638184, + "step": 1354 + }, + { + "epoch": 0.6697145681453107, + "grad_norm": 1.4472355399081582, + "learning_rate": 1.5894365618640077e-05, + "loss": 0.3283170461654663, + "step": 1355 + }, + { + "epoch": 0.6702088224391449, + "grad_norm": 1.1520168978191874, + "learning_rate": 1.588776332774271e-05, + "loss": 0.335905522108078, + "step": 1356 + }, + { + "epoch": 0.6707030767329791, + "grad_norm": 1.1244736910598108, + "learning_rate": 1.5881157106258666e-05, + "loss": 0.3055316209793091, + "step": 1357 + }, + { + "epoch": 0.6711973310268133, + "grad_norm": 1.050666765324263, + "learning_rate": 1.5874546958598172e-05, + "loss": 0.2873142659664154, + "step": 1358 + }, + { + "epoch": 0.6716915853206474, + "grad_norm": 1.0218331884680711, + "learning_rate": 1.586793288917406e-05, + "loss": 0.29659712314605713, + "step": 1359 + }, + { + "epoch": 0.6721858396144816, + "grad_norm": 1.0827802259474617, + "learning_rate": 1.5861314902401802e-05, + "loss": 0.33081990480422974, + "step": 1360 + }, + { + "epoch": 0.6726800939083158, + "grad_norm": 1.2140107638410536, + "learning_rate": 1.5854693002699457e-05, + "loss": 0.3559015691280365, + "step": 1361 + }, + { + "epoch": 0.67317434820215, + "grad_norm": 1.1424828520826207, + "learning_rate": 1.584806719448772e-05, + "loss": 0.3353438973426819, + "step": 1362 + }, + { + "epoch": 0.6736686024959841, + "grad_norm": 1.0533009951881467, + "learning_rate": 1.5841437482189882e-05, + "loss": 0.3320685923099518, + "step": 1363 + }, + { + "epoch": 0.6741628567898184, + "grad_norm": 1.0600254033440624, + "learning_rate": 1.5834803870231846e-05, + "loss": 0.3070179224014282, + "step": 1364 + }, + { + "epoch": 0.6746571110836526, + "grad_norm": 1.0452219544938475, + "learning_rate": 1.5828166363042115e-05, + "loss": 0.28779780864715576, + "step": 1365 + }, + { + "epoch": 0.6751513653774868, + "grad_norm": 0.9932658974656241, + "learning_rate": 1.5821524965051793e-05, + "loss": 0.2793114185333252, + "step": 1366 + }, + { + "epoch": 0.6756456196713209, + "grad_norm": 1.117744874079583, + "learning_rate": 1.5814879680694585e-05, + "loss": 0.3586357831954956, + "step": 1367 + }, + { + "epoch": 0.6761398739651551, + "grad_norm": 1.122494918770383, + "learning_rate": 1.5808230514406786e-05, + "loss": 0.35258832573890686, + "step": 1368 + }, + { + "epoch": 0.6766341282589893, + "grad_norm": 1.0624893424167818, + "learning_rate": 1.5801577470627286e-05, + "loss": 0.2783607840538025, + "step": 1369 + }, + { + "epoch": 0.6771283825528235, + "grad_norm": 1.217710803865883, + "learning_rate": 1.579492055379756e-05, + "loss": 0.3494858741760254, + "step": 1370 + }, + { + "epoch": 0.6776226368466576, + "grad_norm": 1.1913846811426898, + "learning_rate": 1.578825976836167e-05, + "loss": 0.34512561559677124, + "step": 1371 + }, + { + "epoch": 0.6781168911404918, + "grad_norm": 1.0303182849177774, + "learning_rate": 1.5781595118766265e-05, + "loss": 0.2923341989517212, + "step": 1372 + }, + { + "epoch": 0.678611145434326, + "grad_norm": 1.0423481220482165, + "learning_rate": 1.5774926609460566e-05, + "loss": 0.3078833818435669, + "step": 1373 + }, + { + "epoch": 0.6791053997281601, + "grad_norm": 1.0871141007271816, + "learning_rate": 1.576825424489638e-05, + "loss": 0.3147008419036865, + "step": 1374 + }, + { + "epoch": 0.6795996540219943, + "grad_norm": 1.0340836184197277, + "learning_rate": 1.576157802952807e-05, + "loss": 0.2907789349555969, + "step": 1375 + }, + { + "epoch": 0.6800939083158285, + "grad_norm": 1.1801114991913197, + "learning_rate": 1.57548979678126e-05, + "loss": 0.2941555976867676, + "step": 1376 + }, + { + "epoch": 0.6805881626096627, + "grad_norm": 1.137398706652914, + "learning_rate": 1.5748214064209473e-05, + "loss": 0.3452342748641968, + "step": 1377 + }, + { + "epoch": 0.6810824169034968, + "grad_norm": 0.9870368606552603, + "learning_rate": 1.5741526323180765e-05, + "loss": 0.31481361389160156, + "step": 1378 + }, + { + "epoch": 0.681576671197331, + "grad_norm": 1.1734004344416635, + "learning_rate": 1.573483474919112e-05, + "loss": 0.3403349220752716, + "step": 1379 + }, + { + "epoch": 0.6820709254911652, + "grad_norm": 1.3661262290783491, + "learning_rate": 1.572813934670774e-05, + "loss": 0.3283364176750183, + "step": 1380 + }, + { + "epoch": 0.6825651797849994, + "grad_norm": 1.0790334315781973, + "learning_rate": 1.5721440120200376e-05, + "loss": 0.3294883966445923, + "step": 1381 + }, + { + "epoch": 0.6830594340788335, + "grad_norm": 1.057215667272423, + "learning_rate": 1.5714737074141338e-05, + "loss": 0.3087981343269348, + "step": 1382 + }, + { + "epoch": 0.6835536883726677, + "grad_norm": 0.9953380542206125, + "learning_rate": 1.570803021300548e-05, + "loss": 0.29511693120002747, + "step": 1383 + }, + { + "epoch": 0.6840479426665019, + "grad_norm": 1.1147415286539601, + "learning_rate": 1.570131954127021e-05, + "loss": 0.3620823323726654, + "step": 1384 + }, + { + "epoch": 0.6845421969603361, + "grad_norm": 1.2518358127130127, + "learning_rate": 1.5694605063415477e-05, + "loss": 0.3978300988674164, + "step": 1385 + }, + { + "epoch": 0.6850364512541702, + "grad_norm": 1.2104388988265296, + "learning_rate": 1.5687886783923773e-05, + "loss": 0.35367661714553833, + "step": 1386 + }, + { + "epoch": 0.6855307055480044, + "grad_norm": 1.158470270474232, + "learning_rate": 1.5681164707280117e-05, + "loss": 0.3313448131084442, + "step": 1387 + }, + { + "epoch": 0.6860249598418386, + "grad_norm": 1.1312206183637163, + "learning_rate": 1.5674438837972077e-05, + "loss": 0.34115713834762573, + "step": 1388 + }, + { + "epoch": 0.6865192141356729, + "grad_norm": 1.071906380475402, + "learning_rate": 1.566770918048975e-05, + "loss": 0.311326265335083, + "step": 1389 + }, + { + "epoch": 0.687013468429507, + "grad_norm": 1.0496646406815568, + "learning_rate": 1.5660975739325755e-05, + "loss": 0.32622700929641724, + "step": 1390 + }, + { + "epoch": 0.6875077227233412, + "grad_norm": 1.1530479303397307, + "learning_rate": 1.565423851897524e-05, + "loss": 0.36029747128486633, + "step": 1391 + }, + { + "epoch": 0.6880019770171754, + "grad_norm": 0.9691306195768644, + "learning_rate": 1.5647497523935883e-05, + "loss": 0.2771177291870117, + "step": 1392 + }, + { + "epoch": 0.6884962313110096, + "grad_norm": 1.1450942478438548, + "learning_rate": 1.5640752758707868e-05, + "loss": 0.3474002182483673, + "step": 1393 + }, + { + "epoch": 0.6889904856048437, + "grad_norm": 1.09850595363495, + "learning_rate": 1.563400422779391e-05, + "loss": 0.28006255626678467, + "step": 1394 + }, + { + "epoch": 0.6894847398986779, + "grad_norm": 1.0953635794573913, + "learning_rate": 1.562725193569923e-05, + "loss": 0.32151490449905396, + "step": 1395 + }, + { + "epoch": 0.6899789941925121, + "grad_norm": 1.1995785901348681, + "learning_rate": 1.5620495886931557e-05, + "loss": 0.3081187903881073, + "step": 1396 + }, + { + "epoch": 0.6904732484863463, + "grad_norm": 1.1390576796125735, + "learning_rate": 1.561373608600114e-05, + "loss": 0.3158992826938629, + "step": 1397 + }, + { + "epoch": 0.6909675027801804, + "grad_norm": 1.1783652693752096, + "learning_rate": 1.5606972537420723e-05, + "loss": 0.33790335059165955, + "step": 1398 + }, + { + "epoch": 0.6914617570740146, + "grad_norm": 1.1733705340509706, + "learning_rate": 1.5600205245705553e-05, + "loss": 0.3157292902469635, + "step": 1399 + }, + { + "epoch": 0.6919560113678488, + "grad_norm": 1.1674234642263648, + "learning_rate": 1.559343421537338e-05, + "loss": 0.31090572476387024, + "step": 1400 + }, + { + "epoch": 0.6924502656616829, + "grad_norm": 1.1604041250760992, + "learning_rate": 1.5586659450944443e-05, + "loss": 0.30499958992004395, + "step": 1401 + }, + { + "epoch": 0.6929445199555171, + "grad_norm": 1.0713722972416724, + "learning_rate": 1.5579880956941478e-05, + "loss": 0.3036794662475586, + "step": 1402 + }, + { + "epoch": 0.6934387742493513, + "grad_norm": 1.1543376848490539, + "learning_rate": 1.5573098737889716e-05, + "loss": 0.26514700055122375, + "step": 1403 + }, + { + "epoch": 0.6939330285431855, + "grad_norm": 1.0755683699565965, + "learning_rate": 1.5566312798316867e-05, + "loss": 0.31947457790374756, + "step": 1404 + }, + { + "epoch": 0.6944272828370196, + "grad_norm": 1.1317886658483896, + "learning_rate": 1.5559523142753124e-05, + "loss": 0.29387322068214417, + "step": 1405 + }, + { + "epoch": 0.6949215371308538, + "grad_norm": 1.117372828260635, + "learning_rate": 1.555272977573117e-05, + "loss": 0.33459946513175964, + "step": 1406 + }, + { + "epoch": 0.695415791424688, + "grad_norm": 1.2196871082649428, + "learning_rate": 1.5545932701786154e-05, + "loss": 0.31394320726394653, + "step": 1407 + }, + { + "epoch": 0.6959100457185222, + "grad_norm": 1.0669033993360486, + "learning_rate": 1.5539131925455713e-05, + "loss": 0.2891885042190552, + "step": 1408 + }, + { + "epoch": 0.6964043000123563, + "grad_norm": 1.2475463319045528, + "learning_rate": 1.5532327451279938e-05, + "loss": 0.33686599135398865, + "step": 1409 + }, + { + "epoch": 0.6968985543061905, + "grad_norm": 1.0648029492831064, + "learning_rate": 1.5525519283801405e-05, + "loss": 0.31463146209716797, + "step": 1410 + }, + { + "epoch": 0.6973928086000247, + "grad_norm": 1.226099759538899, + "learning_rate": 1.5518707427565146e-05, + "loss": 0.3598940372467041, + "step": 1411 + }, + { + "epoch": 0.6978870628938589, + "grad_norm": 1.149083094787804, + "learning_rate": 1.5511891887118665e-05, + "loss": 0.32980066537857056, + "step": 1412 + }, + { + "epoch": 0.698381317187693, + "grad_norm": 1.1872142618250514, + "learning_rate": 1.5505072667011915e-05, + "loss": 0.3264961242675781, + "step": 1413 + }, + { + "epoch": 0.6988755714815272, + "grad_norm": 1.0604770012284015, + "learning_rate": 1.549824977179731e-05, + "loss": 0.3355519771575928, + "step": 1414 + }, + { + "epoch": 0.6993698257753614, + "grad_norm": 1.0119765938601295, + "learning_rate": 1.5491423206029717e-05, + "loss": 0.27073174715042114, + "step": 1415 + }, + { + "epoch": 0.6998640800691956, + "grad_norm": 1.1356545279602395, + "learning_rate": 1.5484592974266456e-05, + "loss": 0.32638323307037354, + "step": 1416 + }, + { + "epoch": 0.7003583343630297, + "grad_norm": 1.192307972564017, + "learning_rate": 1.5477759081067288e-05, + "loss": 0.38844019174575806, + "step": 1417 + }, + { + "epoch": 0.700852588656864, + "grad_norm": 1.1060104448967631, + "learning_rate": 1.5470921530994426e-05, + "loss": 0.3386498689651489, + "step": 1418 + }, + { + "epoch": 0.7013468429506982, + "grad_norm": 1.113333245203903, + "learning_rate": 1.5464080328612522e-05, + "loss": 0.3304392993450165, + "step": 1419 + }, + { + "epoch": 0.7018410972445324, + "grad_norm": 1.1024158772042199, + "learning_rate": 1.545723547848866e-05, + "loss": 0.314837247133255, + "step": 1420 + }, + { + "epoch": 0.7023353515383665, + "grad_norm": 0.9888192419219921, + "learning_rate": 1.5450386985192368e-05, + "loss": 0.30135127902030945, + "step": 1421 + }, + { + "epoch": 0.7028296058322007, + "grad_norm": 1.0640354824874358, + "learning_rate": 1.5443534853295602e-05, + "loss": 0.29176798462867737, + "step": 1422 + }, + { + "epoch": 0.7033238601260349, + "grad_norm": 1.3021824252266967, + "learning_rate": 1.5436679087372746e-05, + "loss": 0.36438125371932983, + "step": 1423 + }, + { + "epoch": 0.703818114419869, + "grad_norm": 1.1147780995478658, + "learning_rate": 1.542981969200061e-05, + "loss": 0.37140434980392456, + "step": 1424 + }, + { + "epoch": 0.7043123687137032, + "grad_norm": 1.3176538326023695, + "learning_rate": 1.542295667175843e-05, + "loss": 0.36072903871536255, + "step": 1425 + }, + { + "epoch": 0.7048066230075374, + "grad_norm": 1.1262882885574772, + "learning_rate": 1.5416090031227868e-05, + "loss": 0.3266327977180481, + "step": 1426 + }, + { + "epoch": 0.7053008773013716, + "grad_norm": 1.0179565917308762, + "learning_rate": 1.5409219774992978e-05, + "loss": 0.3081423342227936, + "step": 1427 + }, + { + "epoch": 0.7057951315952057, + "grad_norm": 1.3034313694807904, + "learning_rate": 1.5402345907640262e-05, + "loss": 0.3571197986602783, + "step": 1428 + }, + { + "epoch": 0.7062893858890399, + "grad_norm": 1.1385888315844002, + "learning_rate": 1.5395468433758604e-05, + "loss": 0.32380104064941406, + "step": 1429 + }, + { + "epoch": 0.7067836401828741, + "grad_norm": 1.0129718670355197, + "learning_rate": 1.5388587357939313e-05, + "loss": 0.33777546882629395, + "step": 1430 + }, + { + "epoch": 0.7072778944767083, + "grad_norm": 1.0997780610685683, + "learning_rate": 1.5381702684776093e-05, + "loss": 0.31793370842933655, + "step": 1431 + }, + { + "epoch": 0.7077721487705424, + "grad_norm": 1.065324744616134, + "learning_rate": 1.537481441886506e-05, + "loss": 0.3282355070114136, + "step": 1432 + }, + { + "epoch": 0.7082664030643766, + "grad_norm": 1.1740655706878367, + "learning_rate": 1.5367922564804716e-05, + "loss": 0.3523057699203491, + "step": 1433 + }, + { + "epoch": 0.7087606573582108, + "grad_norm": 1.1790295388685894, + "learning_rate": 1.5361027127195964e-05, + "loss": 0.36351460218429565, + "step": 1434 + }, + { + "epoch": 0.709254911652045, + "grad_norm": 2.2339320260763373, + "learning_rate": 1.5354128110642102e-05, + "loss": 0.2936401963233948, + "step": 1435 + }, + { + "epoch": 0.7097491659458791, + "grad_norm": 1.1080576186798932, + "learning_rate": 1.5347225519748818e-05, + "loss": 0.3178175091743469, + "step": 1436 + }, + { + "epoch": 0.7102434202397133, + "grad_norm": 1.1375761171495609, + "learning_rate": 1.5340319359124177e-05, + "loss": 0.3098832666873932, + "step": 1437 + }, + { + "epoch": 0.7107376745335475, + "grad_norm": 0.951807024133746, + "learning_rate": 1.5333409633378633e-05, + "loss": 0.2644941806793213, + "step": 1438 + }, + { + "epoch": 0.7112319288273817, + "grad_norm": 1.1193499530101132, + "learning_rate": 1.5326496347125027e-05, + "loss": 0.3046286702156067, + "step": 1439 + }, + { + "epoch": 0.7117261831212158, + "grad_norm": 1.1009971048909013, + "learning_rate": 1.5319579504978567e-05, + "loss": 0.33757925033569336, + "step": 1440 + }, + { + "epoch": 0.71222043741505, + "grad_norm": 1.1415644120008137, + "learning_rate": 1.5312659111556832e-05, + "loss": 0.3470202684402466, + "step": 1441 + }, + { + "epoch": 0.7127146917088842, + "grad_norm": 1.0829483976260892, + "learning_rate": 1.5305735171479785e-05, + "loss": 0.3310868740081787, + "step": 1442 + }, + { + "epoch": 0.7132089460027184, + "grad_norm": 1.2738694792524405, + "learning_rate": 1.529880768936975e-05, + "loss": 0.31649407744407654, + "step": 1443 + }, + { + "epoch": 0.7137032002965525, + "grad_norm": 1.0510301649062292, + "learning_rate": 1.5291876669851408e-05, + "loss": 0.2986135184764862, + "step": 1444 + }, + { + "epoch": 0.7141974545903867, + "grad_norm": 1.1622525691797543, + "learning_rate": 1.5284942117551817e-05, + "loss": 0.3033408224582672, + "step": 1445 + }, + { + "epoch": 0.714691708884221, + "grad_norm": 1.1648719329133883, + "learning_rate": 1.5278004037100378e-05, + "loss": 0.34231680631637573, + "step": 1446 + }, + { + "epoch": 0.7151859631780552, + "grad_norm": 1.1347301204641653, + "learning_rate": 1.5271062433128857e-05, + "loss": 0.3273579478263855, + "step": 1447 + }, + { + "epoch": 0.7156802174718893, + "grad_norm": 1.2307292916383785, + "learning_rate": 1.5264117310271372e-05, + "loss": 0.344064861536026, + "step": 1448 + }, + { + "epoch": 0.7161744717657235, + "grad_norm": 1.0685505855741966, + "learning_rate": 1.5257168673164384e-05, + "loss": 0.3131038546562195, + "step": 1449 + }, + { + "epoch": 0.7166687260595577, + "grad_norm": 1.1403948273488542, + "learning_rate": 1.5250216526446708e-05, + "loss": 0.32794755697250366, + "step": 1450 + }, + { + "epoch": 0.7171629803533918, + "grad_norm": 1.2597097116316462, + "learning_rate": 1.5243260874759494e-05, + "loss": 0.3633842468261719, + "step": 1451 + }, + { + "epoch": 0.717657234647226, + "grad_norm": 0.943013995379639, + "learning_rate": 1.5236301722746235e-05, + "loss": 0.24650251865386963, + "step": 1452 + }, + { + "epoch": 0.7181514889410602, + "grad_norm": 1.1777840335640666, + "learning_rate": 1.5229339075052769e-05, + "loss": 0.34167230129241943, + "step": 1453 + }, + { + "epoch": 0.7186457432348944, + "grad_norm": 1.0945051908887762, + "learning_rate": 1.522237293632725e-05, + "loss": 0.29454126954078674, + "step": 1454 + }, + { + "epoch": 0.7191399975287285, + "grad_norm": 1.1517995676673816, + "learning_rate": 1.5215403311220178e-05, + "loss": 0.3709314465522766, + "step": 1455 + }, + { + "epoch": 0.7196342518225627, + "grad_norm": 1.1421076533752808, + "learning_rate": 1.5208430204384377e-05, + "loss": 0.3543916642665863, + "step": 1456 + }, + { + "epoch": 0.7201285061163969, + "grad_norm": 1.1924648010793302, + "learning_rate": 1.5201453620474986e-05, + "loss": 0.33827707171440125, + "step": 1457 + }, + { + "epoch": 0.7206227604102311, + "grad_norm": 1.1616070041381745, + "learning_rate": 1.5194473564149484e-05, + "loss": 0.31289514899253845, + "step": 1458 + }, + { + "epoch": 0.7211170147040652, + "grad_norm": 1.1655875507968474, + "learning_rate": 1.5187490040067646e-05, + "loss": 0.3345657289028168, + "step": 1459 + }, + { + "epoch": 0.7216112689978994, + "grad_norm": 1.091971369166992, + "learning_rate": 1.5180503052891578e-05, + "loss": 0.3322404623031616, + "step": 1460 + }, + { + "epoch": 0.7221055232917336, + "grad_norm": 1.0009476128919939, + "learning_rate": 1.5173512607285692e-05, + "loss": 0.31120461225509644, + "step": 1461 + }, + { + "epoch": 0.7225997775855678, + "grad_norm": 1.140979323325151, + "learning_rate": 1.5166518707916714e-05, + "loss": 0.3388645648956299, + "step": 1462 + }, + { + "epoch": 0.7230940318794019, + "grad_norm": 1.098469502784105, + "learning_rate": 1.5159521359453661e-05, + "loss": 0.3048557639122009, + "step": 1463 + }, + { + "epoch": 0.7235882861732361, + "grad_norm": 1.0437743408474436, + "learning_rate": 1.5152520566567873e-05, + "loss": 0.32128047943115234, + "step": 1464 + }, + { + "epoch": 0.7240825404670703, + "grad_norm": 1.0754519434907805, + "learning_rate": 1.5145516333932973e-05, + "loss": 0.3016900420188904, + "step": 1465 + }, + { + "epoch": 0.7245767947609045, + "grad_norm": 0.9730419604339762, + "learning_rate": 1.5138508666224892e-05, + "loss": 0.27410340309143066, + "step": 1466 + }, + { + "epoch": 0.7250710490547386, + "grad_norm": 1.1548137674896846, + "learning_rate": 1.513149756812184e-05, + "loss": 0.314311146736145, + "step": 1467 + }, + { + "epoch": 0.7255653033485728, + "grad_norm": 1.0652992161056178, + "learning_rate": 1.5124483044304339e-05, + "loss": 0.300488144159317, + "step": 1468 + }, + { + "epoch": 0.726059557642407, + "grad_norm": 1.0437811199768454, + "learning_rate": 1.5117465099455173e-05, + "loss": 0.2610424757003784, + "step": 1469 + }, + { + "epoch": 0.7265538119362412, + "grad_norm": 1.0473843452456588, + "learning_rate": 1.5110443738259425e-05, + "loss": 0.2631368637084961, + "step": 1470 + }, + { + "epoch": 0.7270480662300753, + "grad_norm": 1.1572872923696271, + "learning_rate": 1.510341896540446e-05, + "loss": 0.2894716262817383, + "step": 1471 + }, + { + "epoch": 0.7275423205239095, + "grad_norm": 1.1539682565039295, + "learning_rate": 1.5096390785579913e-05, + "loss": 0.2859206199645996, + "step": 1472 + }, + { + "epoch": 0.7280365748177438, + "grad_norm": 1.1861776477785995, + "learning_rate": 1.5089359203477693e-05, + "loss": 0.2966008484363556, + "step": 1473 + }, + { + "epoch": 0.728530829111578, + "grad_norm": 1.0911088494470613, + "learning_rate": 1.5082324223791988e-05, + "loss": 0.3187675476074219, + "step": 1474 + }, + { + "epoch": 0.729025083405412, + "grad_norm": 1.1920802680772398, + "learning_rate": 1.507528585121925e-05, + "loss": 0.32434171438217163, + "step": 1475 + }, + { + "epoch": 0.7295193376992463, + "grad_norm": 1.233732485912319, + "learning_rate": 1.5068244090458197e-05, + "loss": 0.3518364429473877, + "step": 1476 + }, + { + "epoch": 0.7300135919930805, + "grad_norm": 1.091189612496036, + "learning_rate": 1.50611989462098e-05, + "loss": 0.32294291257858276, + "step": 1477 + }, + { + "epoch": 0.7305078462869146, + "grad_norm": 1.184027940449126, + "learning_rate": 1.5054150423177307e-05, + "loss": 0.3413415253162384, + "step": 1478 + }, + { + "epoch": 0.7310021005807488, + "grad_norm": 1.1760745568840743, + "learning_rate": 1.5047098526066207e-05, + "loss": 0.3562566637992859, + "step": 1479 + }, + { + "epoch": 0.731496354874583, + "grad_norm": 1.130494844464842, + "learning_rate": 1.504004325958424e-05, + "loss": 0.30018410086631775, + "step": 1480 + }, + { + "epoch": 0.7319906091684172, + "grad_norm": 1.027268124102698, + "learning_rate": 1.5032984628441409e-05, + "loss": 0.2937701344490051, + "step": 1481 + }, + { + "epoch": 0.7324848634622513, + "grad_norm": 1.131154387943882, + "learning_rate": 1.5025922637349953e-05, + "loss": 0.3268740773200989, + "step": 1482 + }, + { + "epoch": 0.7329791177560855, + "grad_norm": 1.053089747814938, + "learning_rate": 1.5018857291024356e-05, + "loss": 0.3246314525604248, + "step": 1483 + }, + { + "epoch": 0.7334733720499197, + "grad_norm": 1.033026683314433, + "learning_rate": 1.501178859418134e-05, + "loss": 0.276904433965683, + "step": 1484 + }, + { + "epoch": 0.7339676263437539, + "grad_norm": 1.1901915790154476, + "learning_rate": 1.5004716551539873e-05, + "loss": 0.27665287256240845, + "step": 1485 + }, + { + "epoch": 0.734461880637588, + "grad_norm": 1.065690181516995, + "learning_rate": 1.4997641167821143e-05, + "loss": 0.325985848903656, + "step": 1486 + }, + { + "epoch": 0.7349561349314222, + "grad_norm": 1.2333398180696593, + "learning_rate": 1.4990562447748573e-05, + "loss": 0.2951817214488983, + "step": 1487 + }, + { + "epoch": 0.7354503892252564, + "grad_norm": 1.0415622998394476, + "learning_rate": 1.4983480396047822e-05, + "loss": 0.2592772841453552, + "step": 1488 + }, + { + "epoch": 0.7359446435190906, + "grad_norm": 1.0977128928049222, + "learning_rate": 1.4976395017446767e-05, + "loss": 0.3278253674507141, + "step": 1489 + }, + { + "epoch": 0.7364388978129247, + "grad_norm": 2.4840016288238886, + "learning_rate": 1.4969306316675497e-05, + "loss": 0.32366445660591125, + "step": 1490 + }, + { + "epoch": 0.7369331521067589, + "grad_norm": 1.065618785924185, + "learning_rate": 1.4962214298466337e-05, + "loss": 0.30544513463974, + "step": 1491 + }, + { + "epoch": 0.7374274064005931, + "grad_norm": 1.1151764286390358, + "learning_rate": 1.4955118967553812e-05, + "loss": 0.3712898790836334, + "step": 1492 + }, + { + "epoch": 0.7379216606944273, + "grad_norm": 1.072095940180716, + "learning_rate": 1.4948020328674662e-05, + "loss": 0.3006438612937927, + "step": 1493 + }, + { + "epoch": 0.7384159149882614, + "grad_norm": 1.1145573413296936, + "learning_rate": 1.494091838656784e-05, + "loss": 0.3494953215122223, + "step": 1494 + }, + { + "epoch": 0.7389101692820956, + "grad_norm": 1.091824613740768, + "learning_rate": 1.4933813145974504e-05, + "loss": 0.2698785662651062, + "step": 1495 + }, + { + "epoch": 0.7394044235759298, + "grad_norm": 1.1072713673032075, + "learning_rate": 1.4926704611638003e-05, + "loss": 0.34775635600090027, + "step": 1496 + }, + { + "epoch": 0.739898677869764, + "grad_norm": 1.1542085278706422, + "learning_rate": 1.4919592788303898e-05, + "loss": 0.328175812959671, + "step": 1497 + }, + { + "epoch": 0.7403929321635981, + "grad_norm": 1.1735161292651393, + "learning_rate": 1.491247768071994e-05, + "loss": 0.3320178687572479, + "step": 1498 + }, + { + "epoch": 0.7408871864574323, + "grad_norm": 1.8687355330582882, + "learning_rate": 1.4905359293636074e-05, + "loss": 0.308150053024292, + "step": 1499 + }, + { + "epoch": 0.7413814407512666, + "grad_norm": 1.1422704685641505, + "learning_rate": 1.489823763180443e-05, + "loss": 0.3311570882797241, + "step": 1500 + }, + { + "epoch": 0.7418756950451008, + "grad_norm": 1.2844910379105308, + "learning_rate": 1.4891112699979334e-05, + "loss": 0.36916327476501465, + "step": 1501 + }, + { + "epoch": 0.7423699493389349, + "grad_norm": 1.0354244070195735, + "learning_rate": 1.4883984502917286e-05, + "loss": 0.28005337715148926, + "step": 1502 + }, + { + "epoch": 0.7428642036327691, + "grad_norm": 1.2241818166146565, + "learning_rate": 1.4876853045376962e-05, + "loss": 0.3502781391143799, + "step": 1503 + }, + { + "epoch": 0.7433584579266033, + "grad_norm": 1.2448349850537428, + "learning_rate": 1.4869718332119232e-05, + "loss": 0.32032880187034607, + "step": 1504 + }, + { + "epoch": 0.7438527122204374, + "grad_norm": 1.1236679189592251, + "learning_rate": 1.4862580367907118e-05, + "loss": 0.3229472041130066, + "step": 1505 + }, + { + "epoch": 0.7443469665142716, + "grad_norm": 1.087360074547477, + "learning_rate": 1.4855439157505833e-05, + "loss": 0.2725368142127991, + "step": 1506 + }, + { + "epoch": 0.7448412208081058, + "grad_norm": 1.2509876854452482, + "learning_rate": 1.4848294705682737e-05, + "loss": 0.35358861088752747, + "step": 1507 + }, + { + "epoch": 0.74533547510194, + "grad_norm": 1.0843196708603702, + "learning_rate": 1.4841147017207376e-05, + "loss": 0.299206018447876, + "step": 1508 + }, + { + "epoch": 0.7458297293957741, + "grad_norm": 2.7618594064377384, + "learning_rate": 1.4833996096851432e-05, + "loss": 0.32004314661026, + "step": 1509 + }, + { + "epoch": 0.7463239836896083, + "grad_norm": 1.1399779760270892, + "learning_rate": 1.4826841949388767e-05, + "loss": 0.32800590991973877, + "step": 1510 + }, + { + "epoch": 0.7468182379834425, + "grad_norm": 1.112132363505793, + "learning_rate": 1.4819684579595382e-05, + "loss": 0.2916460335254669, + "step": 1511 + }, + { + "epoch": 0.7473124922772767, + "grad_norm": 1.2041472096070427, + "learning_rate": 1.4812523992249437e-05, + "loss": 0.3276118338108063, + "step": 1512 + }, + { + "epoch": 0.7478067465711108, + "grad_norm": 1.2310079375510266, + "learning_rate": 1.4805360192131234e-05, + "loss": 0.34718069434165955, + "step": 1513 + }, + { + "epoch": 0.748301000864945, + "grad_norm": 1.0130113878676084, + "learning_rate": 1.4798193184023233e-05, + "loss": 0.2810167372226715, + "step": 1514 + }, + { + "epoch": 0.7487952551587792, + "grad_norm": 1.1600230287701154, + "learning_rate": 1.4791022972710017e-05, + "loss": 0.3542296886444092, + "step": 1515 + }, + { + "epoch": 0.7492895094526134, + "grad_norm": 1.0717623685966582, + "learning_rate": 1.4783849562978319e-05, + "loss": 0.27578431367874146, + "step": 1516 + }, + { + "epoch": 0.7497837637464475, + "grad_norm": 1.2193919844014014, + "learning_rate": 1.4776672959617006e-05, + "loss": 0.32235798239707947, + "step": 1517 + }, + { + "epoch": 0.7502780180402817, + "grad_norm": 1.073591922439447, + "learning_rate": 1.4769493167417079e-05, + "loss": 0.30588477849960327, + "step": 1518 + }, + { + "epoch": 0.7507722723341159, + "grad_norm": 1.1259837125407774, + "learning_rate": 1.4762310191171657e-05, + "loss": 0.31242361664772034, + "step": 1519 + }, + { + "epoch": 0.7512665266279501, + "grad_norm": 1.2265290610094162, + "learning_rate": 1.4755124035675995e-05, + "loss": 0.3679526150226593, + "step": 1520 + }, + { + "epoch": 0.7517607809217842, + "grad_norm": 1.0185674037419847, + "learning_rate": 1.4747934705727473e-05, + "loss": 0.28588515520095825, + "step": 1521 + }, + { + "epoch": 0.7522550352156184, + "grad_norm": 1.0624456882482982, + "learning_rate": 1.4740742206125582e-05, + "loss": 0.29861775040626526, + "step": 1522 + }, + { + "epoch": 0.7527492895094526, + "grad_norm": 1.1245071890104912, + "learning_rate": 1.4733546541671928e-05, + "loss": 0.31373754143714905, + "step": 1523 + }, + { + "epoch": 0.7532435438032868, + "grad_norm": 1.1569601569555032, + "learning_rate": 1.472634771717024e-05, + "loss": 0.3127061128616333, + "step": 1524 + }, + { + "epoch": 0.7537377980971209, + "grad_norm": 1.0554556810771654, + "learning_rate": 1.4719145737426346e-05, + "loss": 0.33681541681289673, + "step": 1525 + }, + { + "epoch": 0.7542320523909551, + "grad_norm": 1.1202634511050926, + "learning_rate": 1.4711940607248182e-05, + "loss": 0.30266639590263367, + "step": 1526 + }, + { + "epoch": 0.7547263066847893, + "grad_norm": 1.0915134711866425, + "learning_rate": 1.47047323314458e-05, + "loss": 0.2988300323486328, + "step": 1527 + }, + { + "epoch": 0.7552205609786236, + "grad_norm": 1.1041853232471737, + "learning_rate": 1.4697520914831334e-05, + "loss": 0.32679620385169983, + "step": 1528 + }, + { + "epoch": 0.7557148152724577, + "grad_norm": 1.0049846597819565, + "learning_rate": 1.4690306362219024e-05, + "loss": 0.2935605049133301, + "step": 1529 + }, + { + "epoch": 0.7562090695662919, + "grad_norm": 1.1114952379308272, + "learning_rate": 1.4683088678425204e-05, + "loss": 0.303417831659317, + "step": 1530 + }, + { + "epoch": 0.7567033238601261, + "grad_norm": 1.0605597139601082, + "learning_rate": 1.4675867868268295e-05, + "loss": 0.30822527408599854, + "step": 1531 + }, + { + "epoch": 0.7571975781539602, + "grad_norm": 1.0772522309630048, + "learning_rate": 1.4668643936568807e-05, + "loss": 0.3104674220085144, + "step": 1532 + }, + { + "epoch": 0.7576918324477944, + "grad_norm": 1.0598818436947175, + "learning_rate": 1.4661416888149333e-05, + "loss": 0.27899307012557983, + "step": 1533 + }, + { + "epoch": 0.7581860867416286, + "grad_norm": 1.1291791785743877, + "learning_rate": 1.465418672783455e-05, + "loss": 0.3285380005836487, + "step": 1534 + }, + { + "epoch": 0.7586803410354628, + "grad_norm": 1.0773746767557166, + "learning_rate": 1.4646953460451205e-05, + "loss": 0.32028889656066895, + "step": 1535 + }, + { + "epoch": 0.7591745953292969, + "grad_norm": 1.2647242329167074, + "learning_rate": 1.4639717090828127e-05, + "loss": 0.29870709776878357, + "step": 1536 + }, + { + "epoch": 0.7596688496231311, + "grad_norm": 1.1833149129368068, + "learning_rate": 1.4632477623796216e-05, + "loss": 0.3556699752807617, + "step": 1537 + }, + { + "epoch": 0.7601631039169653, + "grad_norm": 1.158341046754784, + "learning_rate": 1.462523506418843e-05, + "loss": 0.3433789014816284, + "step": 1538 + }, + { + "epoch": 0.7606573582107995, + "grad_norm": 1.2526530715160118, + "learning_rate": 1.4617989416839802e-05, + "loss": 0.3146114945411682, + "step": 1539 + }, + { + "epoch": 0.7611516125046336, + "grad_norm": 1.0254798742920868, + "learning_rate": 1.4610740686587424e-05, + "loss": 0.29029202461242676, + "step": 1540 + }, + { + "epoch": 0.7616458667984678, + "grad_norm": 1.2367212561484746, + "learning_rate": 1.4603488878270442e-05, + "loss": 0.2976688742637634, + "step": 1541 + }, + { + "epoch": 0.762140121092302, + "grad_norm": 1.0384933941129642, + "learning_rate": 1.459623399673006e-05, + "loss": 0.28604352474212646, + "step": 1542 + }, + { + "epoch": 0.7626343753861362, + "grad_norm": 1.2070329997652125, + "learning_rate": 1.4588976046809536e-05, + "loss": 0.34977301955223083, + "step": 1543 + }, + { + "epoch": 0.7631286296799703, + "grad_norm": 1.1108699937366455, + "learning_rate": 1.458171503335417e-05, + "loss": 0.31592974066734314, + "step": 1544 + }, + { + "epoch": 0.7636228839738045, + "grad_norm": 1.224645404968216, + "learning_rate": 1.4574450961211312e-05, + "loss": 0.31539830565452576, + "step": 1545 + }, + { + "epoch": 0.7641171382676387, + "grad_norm": 1.1914008033212045, + "learning_rate": 1.4567183835230355e-05, + "loss": 0.3100752532482147, + "step": 1546 + }, + { + "epoch": 0.7646113925614729, + "grad_norm": 1.1973069016485758, + "learning_rate": 1.4559913660262726e-05, + "loss": 0.31005364656448364, + "step": 1547 + }, + { + "epoch": 0.765105646855307, + "grad_norm": 1.102020410207535, + "learning_rate": 1.4552640441161889e-05, + "loss": 0.3050577640533447, + "step": 1548 + }, + { + "epoch": 0.7655999011491412, + "grad_norm": 1.1151715417212549, + "learning_rate": 1.4545364182783343e-05, + "loss": 0.294721394777298, + "step": 1549 + }, + { + "epoch": 0.7660941554429754, + "grad_norm": 1.0907882805879732, + "learning_rate": 1.4538084889984616e-05, + "loss": 0.2974075376987457, + "step": 1550 + }, + { + "epoch": 0.7665884097368096, + "grad_norm": 1.218483256285566, + "learning_rate": 1.4530802567625259e-05, + "loss": 0.3247089385986328, + "step": 1551 + }, + { + "epoch": 0.7670826640306437, + "grad_norm": 1.1751647980540385, + "learning_rate": 1.4523517220566843e-05, + "loss": 0.3219151198863983, + "step": 1552 + }, + { + "epoch": 0.7675769183244779, + "grad_norm": 1.092743254793347, + "learning_rate": 1.4516228853672962e-05, + "loss": 0.30580246448516846, + "step": 1553 + }, + { + "epoch": 0.7680711726183121, + "grad_norm": 1.0670412493946726, + "learning_rate": 1.4508937471809233e-05, + "loss": 0.2983207702636719, + "step": 1554 + }, + { + "epoch": 0.7685654269121464, + "grad_norm": 1.1155603134808716, + "learning_rate": 1.4501643079843266e-05, + "loss": 0.3429039418697357, + "step": 1555 + }, + { + "epoch": 0.7690596812059804, + "grad_norm": 1.0600298870014666, + "learning_rate": 1.4494345682644704e-05, + "loss": 0.3055192530155182, + "step": 1556 + }, + { + "epoch": 0.7695539354998147, + "grad_norm": 1.0843598968647987, + "learning_rate": 1.4487045285085178e-05, + "loss": 0.2964102327823639, + "step": 1557 + }, + { + "epoch": 0.7700481897936489, + "grad_norm": 1.0436581793993642, + "learning_rate": 1.4479741892038335e-05, + "loss": 0.3088444471359253, + "step": 1558 + }, + { + "epoch": 0.770542444087483, + "grad_norm": 1.070280126063037, + "learning_rate": 1.4472435508379808e-05, + "loss": 0.28697890043258667, + "step": 1559 + }, + { + "epoch": 0.7710366983813172, + "grad_norm": 1.1055317673748768, + "learning_rate": 1.4465126138987242e-05, + "loss": 0.3664681315422058, + "step": 1560 + }, + { + "epoch": 0.7715309526751514, + "grad_norm": 1.1042702127280148, + "learning_rate": 1.4457813788740263e-05, + "loss": 0.3282932937145233, + "step": 1561 + }, + { + "epoch": 0.7720252069689856, + "grad_norm": 1.4857133307558297, + "learning_rate": 1.4450498462520495e-05, + "loss": 0.27597576379776, + "step": 1562 + }, + { + "epoch": 0.7725194612628197, + "grad_norm": 1.2214452597170176, + "learning_rate": 1.4443180165211541e-05, + "loss": 0.3553946614265442, + "step": 1563 + }, + { + "epoch": 0.7730137155566539, + "grad_norm": 1.1827716129984904, + "learning_rate": 1.4435858901698995e-05, + "loss": 0.36224859952926636, + "step": 1564 + }, + { + "epoch": 0.7735079698504881, + "grad_norm": 1.1341836510498036, + "learning_rate": 1.4428534676870427e-05, + "loss": 0.2940914034843445, + "step": 1565 + }, + { + "epoch": 0.7740022241443223, + "grad_norm": 0.9563512574257287, + "learning_rate": 1.4421207495615385e-05, + "loss": 0.2717741131782532, + "step": 1566 + }, + { + "epoch": 0.7744964784381564, + "grad_norm": 1.1327871067959112, + "learning_rate": 1.441387736282539e-05, + "loss": 0.32340431213378906, + "step": 1567 + }, + { + "epoch": 0.7749907327319906, + "grad_norm": 1.1090264087970254, + "learning_rate": 1.4406544283393935e-05, + "loss": 0.3080120086669922, + "step": 1568 + }, + { + "epoch": 0.7754849870258248, + "grad_norm": 1.4441577426158039, + "learning_rate": 1.4399208262216475e-05, + "loss": 0.3118380308151245, + "step": 1569 + }, + { + "epoch": 0.775979241319659, + "grad_norm": 1.3307213271784917, + "learning_rate": 1.439186930419044e-05, + "loss": 0.3086084723472595, + "step": 1570 + }, + { + "epoch": 0.7764734956134931, + "grad_norm": 1.1593176371811458, + "learning_rate": 1.438452741421521e-05, + "loss": 0.3233364522457123, + "step": 1571 + }, + { + "epoch": 0.7769677499073273, + "grad_norm": 1.0623874748102813, + "learning_rate": 1.4377182597192124e-05, + "loss": 0.29029640555381775, + "step": 1572 + }, + { + "epoch": 0.7774620042011615, + "grad_norm": 0.9791711244739897, + "learning_rate": 1.4369834858024476e-05, + "loss": 0.2888006567955017, + "step": 1573 + }, + { + "epoch": 0.7779562584949957, + "grad_norm": 1.1118016172702438, + "learning_rate": 1.4362484201617519e-05, + "loss": 0.3260151743888855, + "step": 1574 + }, + { + "epoch": 0.7784505127888298, + "grad_norm": 1.3306536044832058, + "learning_rate": 1.4355130632878439e-05, + "loss": 0.333207905292511, + "step": 1575 + }, + { + "epoch": 0.778944767082664, + "grad_norm": 1.0844273121477916, + "learning_rate": 1.4347774156716375e-05, + "loss": 0.2577935457229614, + "step": 1576 + }, + { + "epoch": 0.7794390213764982, + "grad_norm": 1.0777103823564191, + "learning_rate": 1.434041477804241e-05, + "loss": 0.29645979404449463, + "step": 1577 + }, + { + "epoch": 0.7799332756703324, + "grad_norm": 1.1743796307407597, + "learning_rate": 1.433305250176955e-05, + "loss": 0.2973156273365021, + "step": 1578 + }, + { + "epoch": 0.7804275299641665, + "grad_norm": 1.0277241805983874, + "learning_rate": 1.4325687332812754e-05, + "loss": 0.29159975051879883, + "step": 1579 + }, + { + "epoch": 0.7809217842580007, + "grad_norm": 1.1751334806332727, + "learning_rate": 1.4318319276088902e-05, + "loss": 0.29718664288520813, + "step": 1580 + }, + { + "epoch": 0.781416038551835, + "grad_norm": 1.316577919508971, + "learning_rate": 1.4310948336516803e-05, + "loss": 0.3262369632720947, + "step": 1581 + }, + { + "epoch": 0.781910292845669, + "grad_norm": 1.182680350644687, + "learning_rate": 1.4303574519017187e-05, + "loss": 0.36491623520851135, + "step": 1582 + }, + { + "epoch": 0.7824045471395032, + "grad_norm": 1.181580153295467, + "learning_rate": 1.4296197828512716e-05, + "loss": 0.3558582365512848, + "step": 1583 + }, + { + "epoch": 0.7828988014333375, + "grad_norm": 0.9802630700834107, + "learning_rate": 1.428881826992796e-05, + "loss": 0.2745930552482605, + "step": 1584 + }, + { + "epoch": 0.7833930557271717, + "grad_norm": 1.1668091765691224, + "learning_rate": 1.4281435848189404e-05, + "loss": 0.3239384889602661, + "step": 1585 + }, + { + "epoch": 0.7838873100210058, + "grad_norm": 1.0164738185404556, + "learning_rate": 1.4274050568225452e-05, + "loss": 0.2708761692047119, + "step": 1586 + }, + { + "epoch": 0.78438156431484, + "grad_norm": 1.2356501028179845, + "learning_rate": 1.4266662434966412e-05, + "loss": 0.3633013963699341, + "step": 1587 + }, + { + "epoch": 0.7848758186086742, + "grad_norm": 1.2145151160613337, + "learning_rate": 1.425927145334449e-05, + "loss": 0.36411651968955994, + "step": 1588 + }, + { + "epoch": 0.7853700729025084, + "grad_norm": 1.2093753197442545, + "learning_rate": 1.4251877628293804e-05, + "loss": 0.3120966851711273, + "step": 1589 + }, + { + "epoch": 0.7858643271963425, + "grad_norm": 1.111474907013162, + "learning_rate": 1.4244480964750365e-05, + "loss": 0.32788634300231934, + "step": 1590 + }, + { + "epoch": 0.7863585814901767, + "grad_norm": 1.1320230499507122, + "learning_rate": 1.423708146765208e-05, + "loss": 0.2919159233570099, + "step": 1591 + }, + { + "epoch": 0.7868528357840109, + "grad_norm": 1.1271090926469096, + "learning_rate": 1.4229679141938749e-05, + "loss": 0.3135683834552765, + "step": 1592 + }, + { + "epoch": 0.7873470900778451, + "grad_norm": 1.2447784007425877, + "learning_rate": 1.4222273992552058e-05, + "loss": 0.351981520652771, + "step": 1593 + }, + { + "epoch": 0.7878413443716792, + "grad_norm": 1.1846979202846248, + "learning_rate": 1.4214866024435576e-05, + "loss": 0.3615785837173462, + "step": 1594 + }, + { + "epoch": 0.7883355986655134, + "grad_norm": 1.1632616021817466, + "learning_rate": 1.420745524253476e-05, + "loss": 0.29399484395980835, + "step": 1595 + }, + { + "epoch": 0.7888298529593476, + "grad_norm": 1.1714512606078011, + "learning_rate": 1.420004165179694e-05, + "loss": 0.30501872301101685, + "step": 1596 + }, + { + "epoch": 0.7893241072531818, + "grad_norm": 1.1172632404953093, + "learning_rate": 1.4192625257171331e-05, + "loss": 0.33745667338371277, + "step": 1597 + }, + { + "epoch": 0.7898183615470159, + "grad_norm": 0.994693525988225, + "learning_rate": 1.4185206063609e-05, + "loss": 0.2675662934780121, + "step": 1598 + }, + { + "epoch": 0.7903126158408501, + "grad_norm": 1.022107075414073, + "learning_rate": 1.41777840760629e-05, + "loss": 0.295659601688385, + "step": 1599 + }, + { + "epoch": 0.7908068701346843, + "grad_norm": 1.119079517603524, + "learning_rate": 1.4170359299487848e-05, + "loss": 0.3164275586605072, + "step": 1600 + }, + { + "epoch": 0.7913011244285185, + "grad_norm": 1.0695885495482724, + "learning_rate": 1.416293173884051e-05, + "loss": 0.3039100766181946, + "step": 1601 + }, + { + "epoch": 0.7917953787223526, + "grad_norm": 1.1080665801372258, + "learning_rate": 1.4155501399079427e-05, + "loss": 0.2994040846824646, + "step": 1602 + }, + { + "epoch": 0.7922896330161868, + "grad_norm": 1.3291271745996591, + "learning_rate": 1.4148068285164984e-05, + "loss": 0.3129369616508484, + "step": 1603 + }, + { + "epoch": 0.792783887310021, + "grad_norm": 1.084724718149673, + "learning_rate": 1.4140632402059424e-05, + "loss": 0.3223167657852173, + "step": 1604 + }, + { + "epoch": 0.7932781416038552, + "grad_norm": 1.0882285752839331, + "learning_rate": 1.4133193754726834e-05, + "loss": 0.2734811305999756, + "step": 1605 + }, + { + "epoch": 0.7937723958976893, + "grad_norm": 1.103029405529104, + "learning_rate": 1.4125752348133148e-05, + "loss": 0.27474087476730347, + "step": 1606 + }, + { + "epoch": 0.7942666501915235, + "grad_norm": 1.0487344928171054, + "learning_rate": 1.4118308187246145e-05, + "loss": 0.2619907557964325, + "step": 1607 + }, + { + "epoch": 0.7947609044853577, + "grad_norm": 1.1981880636142406, + "learning_rate": 1.411086127703544e-05, + "loss": 0.3176937699317932, + "step": 1608 + }, + { + "epoch": 0.7952551587791918, + "grad_norm": 1.12323060393325, + "learning_rate": 1.4103411622472483e-05, + "loss": 0.28044235706329346, + "step": 1609 + }, + { + "epoch": 0.795749413073026, + "grad_norm": 1.0360499320558048, + "learning_rate": 1.409595922853056e-05, + "loss": 0.27778196334838867, + "step": 1610 + }, + { + "epoch": 0.7962436673668603, + "grad_norm": 1.2703211339383462, + "learning_rate": 1.4088504100184777e-05, + "loss": 0.3168628513813019, + "step": 1611 + }, + { + "epoch": 0.7967379216606945, + "grad_norm": 1.1557608708585085, + "learning_rate": 1.4081046242412075e-05, + "loss": 0.30454084277153015, + "step": 1612 + }, + { + "epoch": 0.7972321759545286, + "grad_norm": 1.169123128871501, + "learning_rate": 1.4073585660191214e-05, + "loss": 0.34019169211387634, + "step": 1613 + }, + { + "epoch": 0.7977264302483628, + "grad_norm": 1.173717391982327, + "learning_rate": 1.4066122358502772e-05, + "loss": 0.3044774830341339, + "step": 1614 + }, + { + "epoch": 0.798220684542197, + "grad_norm": 1.1570346377203322, + "learning_rate": 1.4058656342329136e-05, + "loss": 0.3181847333908081, + "step": 1615 + }, + { + "epoch": 0.7987149388360312, + "grad_norm": 1.249158616205248, + "learning_rate": 1.405118761665452e-05, + "loss": 0.3400845229625702, + "step": 1616 + }, + { + "epoch": 0.7992091931298653, + "grad_norm": 1.2103435711338524, + "learning_rate": 1.4043716186464935e-05, + "loss": 0.2845221161842346, + "step": 1617 + }, + { + "epoch": 0.7997034474236995, + "grad_norm": 1.060854004382088, + "learning_rate": 1.4036242056748202e-05, + "loss": 0.27315276861190796, + "step": 1618 + }, + { + "epoch": 0.8001977017175337, + "grad_norm": 1.2994888590220768, + "learning_rate": 1.4028765232493942e-05, + "loss": 0.3388780951499939, + "step": 1619 + }, + { + "epoch": 0.8006919560113679, + "grad_norm": 1.282329812705599, + "learning_rate": 1.4021285718693581e-05, + "loss": 0.338635116815567, + "step": 1620 + }, + { + "epoch": 0.801186210305202, + "grad_norm": 1.051985157077811, + "learning_rate": 1.4013803520340328e-05, + "loss": 0.26962924003601074, + "step": 1621 + }, + { + "epoch": 0.8016804645990362, + "grad_norm": 1.119736165525956, + "learning_rate": 1.4006318642429194e-05, + "loss": 0.32106393575668335, + "step": 1622 + }, + { + "epoch": 0.8021747188928704, + "grad_norm": 1.1215264874092639, + "learning_rate": 1.399883108995698e-05, + "loss": 0.33063358068466187, + "step": 1623 + }, + { + "epoch": 0.8026689731867046, + "grad_norm": 1.2875541426354853, + "learning_rate": 1.3991340867922266e-05, + "loss": 0.31906163692474365, + "step": 1624 + }, + { + "epoch": 0.8031632274805387, + "grad_norm": 1.0397829646035845, + "learning_rate": 1.3983847981325415e-05, + "loss": 0.2601381242275238, + "step": 1625 + }, + { + "epoch": 0.8036574817743729, + "grad_norm": 1.1557585059548563, + "learning_rate": 1.3976352435168577e-05, + "loss": 0.3342537581920624, + "step": 1626 + }, + { + "epoch": 0.8041517360682071, + "grad_norm": 1.2564737583224261, + "learning_rate": 1.3968854234455669e-05, + "loss": 0.3372059166431427, + "step": 1627 + }, + { + "epoch": 0.8046459903620413, + "grad_norm": 1.1676806235835944, + "learning_rate": 1.3961353384192377e-05, + "loss": 0.31026744842529297, + "step": 1628 + }, + { + "epoch": 0.8051402446558754, + "grad_norm": 1.0921501695742, + "learning_rate": 1.3953849889386173e-05, + "loss": 0.2867652177810669, + "step": 1629 + }, + { + "epoch": 0.8056344989497096, + "grad_norm": 1.1055169200249502, + "learning_rate": 1.3946343755046274e-05, + "loss": 0.29169392585754395, + "step": 1630 + }, + { + "epoch": 0.8061287532435438, + "grad_norm": 1.0753220774925722, + "learning_rate": 1.393883498618367e-05, + "loss": 0.2976510524749756, + "step": 1631 + }, + { + "epoch": 0.806623007537378, + "grad_norm": 1.1387290098549956, + "learning_rate": 1.3931323587811107e-05, + "loss": 0.2900371551513672, + "step": 1632 + }, + { + "epoch": 0.8071172618312121, + "grad_norm": 1.0560549112494348, + "learning_rate": 1.3923809564943093e-05, + "loss": 0.31660354137420654, + "step": 1633 + }, + { + "epoch": 0.8076115161250463, + "grad_norm": 1.079892158607702, + "learning_rate": 1.3916292922595875e-05, + "loss": 0.3099827468395233, + "step": 1634 + }, + { + "epoch": 0.8081057704188805, + "grad_norm": 1.1450154190444473, + "learning_rate": 1.3908773665787459e-05, + "loss": 0.34322571754455566, + "step": 1635 + }, + { + "epoch": 0.8086000247127146, + "grad_norm": 1.0812992610334402, + "learning_rate": 1.3901251799537592e-05, + "loss": 0.2780989408493042, + "step": 1636 + }, + { + "epoch": 0.8090942790065488, + "grad_norm": 1.1023331343203706, + "learning_rate": 1.389372732886777e-05, + "loss": 0.31049463152885437, + "step": 1637 + }, + { + "epoch": 0.809588533300383, + "grad_norm": 1.0442513462466116, + "learning_rate": 1.3886200258801213e-05, + "loss": 0.29925107955932617, + "step": 1638 + }, + { + "epoch": 0.8100827875942173, + "grad_norm": 1.1039524368767084, + "learning_rate": 1.3878670594362893e-05, + "loss": 0.31893983483314514, + "step": 1639 + }, + { + "epoch": 0.8105770418880514, + "grad_norm": 1.1529184850949745, + "learning_rate": 1.3871138340579502e-05, + "loss": 0.31307080388069153, + "step": 1640 + }, + { + "epoch": 0.8110712961818856, + "grad_norm": 1.068880489111062, + "learning_rate": 1.3863603502479465e-05, + "loss": 0.28198909759521484, + "step": 1641 + }, + { + "epoch": 0.8115655504757198, + "grad_norm": 1.085975037148026, + "learning_rate": 1.3856066085092936e-05, + "loss": 0.28937461972236633, + "step": 1642 + }, + { + "epoch": 0.812059804769554, + "grad_norm": 1.2145338549731968, + "learning_rate": 1.3848526093451789e-05, + "loss": 0.32332292199134827, + "step": 1643 + }, + { + "epoch": 0.8125540590633881, + "grad_norm": 1.1174132019487801, + "learning_rate": 1.3840983532589606e-05, + "loss": 0.3059847056865692, + "step": 1644 + }, + { + "epoch": 0.8130483133572223, + "grad_norm": 1.0961695985122493, + "learning_rate": 1.3833438407541698e-05, + "loss": 0.2939583957195282, + "step": 1645 + }, + { + "epoch": 0.8135425676510565, + "grad_norm": 1.098340825845408, + "learning_rate": 1.3825890723345082e-05, + "loss": 0.3293933868408203, + "step": 1646 + }, + { + "epoch": 0.8140368219448907, + "grad_norm": 1.230371641918686, + "learning_rate": 1.3818340485038488e-05, + "loss": 0.33373600244522095, + "step": 1647 + }, + { + "epoch": 0.8145310762387248, + "grad_norm": 1.0438422749127716, + "learning_rate": 1.3810787697662337e-05, + "loss": 0.2716716527938843, + "step": 1648 + }, + { + "epoch": 0.815025330532559, + "grad_norm": 1.0698510483790238, + "learning_rate": 1.3803232366258774e-05, + "loss": 0.26109835505485535, + "step": 1649 + }, + { + "epoch": 0.8155195848263932, + "grad_norm": 1.1399890180248013, + "learning_rate": 1.3795674495871627e-05, + "loss": 0.3161536753177643, + "step": 1650 + }, + { + "epoch": 0.8160138391202274, + "grad_norm": 1.0602857079391073, + "learning_rate": 1.3788114091546414e-05, + "loss": 0.3078432083129883, + "step": 1651 + }, + { + "epoch": 0.8165080934140615, + "grad_norm": 1.2174185154702881, + "learning_rate": 1.3780551158330364e-05, + "loss": 0.31023627519607544, + "step": 1652 + }, + { + "epoch": 0.8170023477078957, + "grad_norm": 1.2616374354619766, + "learning_rate": 1.3772985701272374e-05, + "loss": 0.3438849151134491, + "step": 1653 + }, + { + "epoch": 0.8174966020017299, + "grad_norm": 1.178467386446937, + "learning_rate": 1.376541772542304e-05, + "loss": 0.31897789239883423, + "step": 1654 + }, + { + "epoch": 0.8179908562955641, + "grad_norm": 1.1125307511503921, + "learning_rate": 1.3757847235834636e-05, + "loss": 0.3101171553134918, + "step": 1655 + }, + { + "epoch": 0.8184851105893982, + "grad_norm": 1.0084998738545823, + "learning_rate": 1.375027423756111e-05, + "loss": 0.28926995396614075, + "step": 1656 + }, + { + "epoch": 0.8189793648832324, + "grad_norm": 1.0711136459333532, + "learning_rate": 1.3742698735658087e-05, + "loss": 0.322610080242157, + "step": 1657 + }, + { + "epoch": 0.8194736191770666, + "grad_norm": 1.272869487692088, + "learning_rate": 1.3735120735182865e-05, + "loss": 0.27430039644241333, + "step": 1658 + }, + { + "epoch": 0.8199678734709008, + "grad_norm": 1.217768709412782, + "learning_rate": 1.3727540241194408e-05, + "loss": 0.3091571629047394, + "step": 1659 + }, + { + "epoch": 0.8204621277647349, + "grad_norm": 1.1956758026057746, + "learning_rate": 1.3719957258753347e-05, + "loss": 0.3039378523826599, + "step": 1660 + }, + { + "epoch": 0.8209563820585691, + "grad_norm": 1.0829759157920493, + "learning_rate": 1.371237179292197e-05, + "loss": 0.29711851477622986, + "step": 1661 + }, + { + "epoch": 0.8214506363524033, + "grad_norm": 1.238383962420106, + "learning_rate": 1.370478384876423e-05, + "loss": 0.32411956787109375, + "step": 1662 + }, + { + "epoch": 0.8219448906462374, + "grad_norm": 1.124344248809279, + "learning_rate": 1.3697193431345725e-05, + "loss": 0.2981719672679901, + "step": 1663 + }, + { + "epoch": 0.8224391449400716, + "grad_norm": 1.1755056696925432, + "learning_rate": 1.3689600545733713e-05, + "loss": 0.32756730914115906, + "step": 1664 + }, + { + "epoch": 0.8229333992339058, + "grad_norm": 1.2289352827455349, + "learning_rate": 1.3682005196997094e-05, + "loss": 0.3910979628562927, + "step": 1665 + }, + { + "epoch": 0.82342765352774, + "grad_norm": 1.1421419999727185, + "learning_rate": 1.3674407390206417e-05, + "loss": 0.31716856360435486, + "step": 1666 + }, + { + "epoch": 0.8239219078215742, + "grad_norm": 1.087053372594853, + "learning_rate": 1.3666807130433865e-05, + "loss": 0.31816208362579346, + "step": 1667 + }, + { + "epoch": 0.8244161621154084, + "grad_norm": 1.075967107077939, + "learning_rate": 1.3659204422753265e-05, + "loss": 0.3008955121040344, + "step": 1668 + }, + { + "epoch": 0.8249104164092426, + "grad_norm": 1.0356290376868373, + "learning_rate": 1.3651599272240078e-05, + "loss": 0.2957409918308258, + "step": 1669 + }, + { + "epoch": 0.8254046707030768, + "grad_norm": 1.0989692049502364, + "learning_rate": 1.364399168397139e-05, + "loss": 0.33019471168518066, + "step": 1670 + }, + { + "epoch": 0.8258989249969109, + "grad_norm": 1.1429937641963879, + "learning_rate": 1.3636381663025917e-05, + "loss": 0.3532376289367676, + "step": 1671 + }, + { + "epoch": 0.8263931792907451, + "grad_norm": 1.1063068636148639, + "learning_rate": 1.362876921448401e-05, + "loss": 0.2980180084705353, + "step": 1672 + }, + { + "epoch": 0.8268874335845793, + "grad_norm": 1.0418269417044947, + "learning_rate": 1.362115434342762e-05, + "loss": 0.27932479977607727, + "step": 1673 + }, + { + "epoch": 0.8273816878784135, + "grad_norm": 1.1782768860255097, + "learning_rate": 1.3613537054940331e-05, + "loss": 0.2783966064453125, + "step": 1674 + }, + { + "epoch": 0.8278759421722476, + "grad_norm": 1.1803795842967677, + "learning_rate": 1.3605917354107336e-05, + "loss": 0.2957308888435364, + "step": 1675 + }, + { + "epoch": 0.8283701964660818, + "grad_norm": 1.180747940998609, + "learning_rate": 1.3598295246015439e-05, + "loss": 0.31640201807022095, + "step": 1676 + }, + { + "epoch": 0.828864450759916, + "grad_norm": 1.0988354367735653, + "learning_rate": 1.3590670735753047e-05, + "loss": 0.2969709634780884, + "step": 1677 + }, + { + "epoch": 0.8293587050537502, + "grad_norm": 1.1164468460017938, + "learning_rate": 1.3583043828410177e-05, + "loss": 0.34167301654815674, + "step": 1678 + }, + { + "epoch": 0.8298529593475843, + "grad_norm": 1.0956930352290435, + "learning_rate": 1.3575414529078443e-05, + "loss": 0.28540804982185364, + "step": 1679 + }, + { + "epoch": 0.8303472136414185, + "grad_norm": 1.1795719749617215, + "learning_rate": 1.3567782842851054e-05, + "loss": 0.2962091565132141, + "step": 1680 + }, + { + "epoch": 0.8308414679352527, + "grad_norm": 1.1969039130243166, + "learning_rate": 1.3560148774822816e-05, + "loss": 0.3650284707546234, + "step": 1681 + }, + { + "epoch": 0.8313357222290869, + "grad_norm": 1.1374534594887609, + "learning_rate": 1.3552512330090126e-05, + "loss": 0.3134267330169678, + "step": 1682 + }, + { + "epoch": 0.831829976522921, + "grad_norm": 1.0921894303145987, + "learning_rate": 1.3544873513750967e-05, + "loss": 0.3020439147949219, + "step": 1683 + }, + { + "epoch": 0.8323242308167552, + "grad_norm": 0.9393581319245673, + "learning_rate": 1.3537232330904895e-05, + "loss": 0.25083282589912415, + "step": 1684 + }, + { + "epoch": 0.8328184851105894, + "grad_norm": 1.2024147558027563, + "learning_rate": 1.3529588786653063e-05, + "loss": 0.33875352144241333, + "step": 1685 + }, + { + "epoch": 0.8333127394044236, + "grad_norm": 1.0620839528979684, + "learning_rate": 1.3521942886098186e-05, + "loss": 0.2717735171318054, + "step": 1686 + }, + { + "epoch": 0.8338069936982577, + "grad_norm": 1.1255995988400895, + "learning_rate": 1.3514294634344562e-05, + "loss": 0.271842896938324, + "step": 1687 + }, + { + "epoch": 0.8343012479920919, + "grad_norm": 1.3262220000473801, + "learning_rate": 1.3506644036498054e-05, + "loss": 0.29420506954193115, + "step": 1688 + }, + { + "epoch": 0.8347955022859261, + "grad_norm": 1.338127401529371, + "learning_rate": 1.349899109766609e-05, + "loss": 0.3336431682109833, + "step": 1689 + }, + { + "epoch": 0.8352897565797602, + "grad_norm": 1.0514224360912943, + "learning_rate": 1.3491335822957665e-05, + "loss": 0.2848295569419861, + "step": 1690 + }, + { + "epoch": 0.8357840108735944, + "grad_norm": 1.1721842125626762, + "learning_rate": 1.3483678217483327e-05, + "loss": 0.3164542019367218, + "step": 1691 + }, + { + "epoch": 0.8362782651674286, + "grad_norm": 1.1542823329984544, + "learning_rate": 1.3476018286355189e-05, + "loss": 0.3030688762664795, + "step": 1692 + }, + { + "epoch": 0.8367725194612629, + "grad_norm": 1.3329503320081877, + "learning_rate": 1.3468356034686912e-05, + "loss": 0.30218198895454407, + "step": 1693 + }, + { + "epoch": 0.837266773755097, + "grad_norm": 1.1429497129560076, + "learning_rate": 1.3460691467593697e-05, + "loss": 0.3327499032020569, + "step": 1694 + }, + { + "epoch": 0.8377610280489312, + "grad_norm": 1.2198627663252626, + "learning_rate": 1.3453024590192307e-05, + "loss": 0.29298892617225647, + "step": 1695 + }, + { + "epoch": 0.8382552823427654, + "grad_norm": 1.238368209416205, + "learning_rate": 1.344535540760104e-05, + "loss": 0.3096858859062195, + "step": 1696 + }, + { + "epoch": 0.8387495366365996, + "grad_norm": 1.1297510733547198, + "learning_rate": 1.3437683924939731e-05, + "loss": 0.30680233240127563, + "step": 1697 + }, + { + "epoch": 0.8392437909304337, + "grad_norm": 1.13902422944666, + "learning_rate": 1.3430010147329752e-05, + "loss": 0.3139989972114563, + "step": 1698 + }, + { + "epoch": 0.8397380452242679, + "grad_norm": 1.132396621648215, + "learning_rate": 1.3422334079894008e-05, + "loss": 0.30418652296066284, + "step": 1699 + }, + { + "epoch": 0.8402322995181021, + "grad_norm": 1.228592620621731, + "learning_rate": 1.3414655727756931e-05, + "loss": 0.31245100498199463, + "step": 1700 + }, + { + "epoch": 0.8407265538119363, + "grad_norm": 1.1908375195801162, + "learning_rate": 1.3406975096044477e-05, + "loss": 0.3381880223751068, + "step": 1701 + }, + { + "epoch": 0.8412208081057704, + "grad_norm": 1.2009611203254438, + "learning_rate": 1.3399292189884135e-05, + "loss": 0.3359968960285187, + "step": 1702 + }, + { + "epoch": 0.8417150623996046, + "grad_norm": 1.2698512981575327, + "learning_rate": 1.3391607014404891e-05, + "loss": 0.3320350646972656, + "step": 1703 + }, + { + "epoch": 0.8422093166934388, + "grad_norm": 1.1615181813433448, + "learning_rate": 1.3383919574737267e-05, + "loss": 0.32830795645713806, + "step": 1704 + }, + { + "epoch": 0.842703570987273, + "grad_norm": 0.9808785682252426, + "learning_rate": 1.3376229876013285e-05, + "loss": 0.255840927362442, + "step": 1705 + }, + { + "epoch": 0.8431978252811071, + "grad_norm": 1.0739012833500008, + "learning_rate": 1.3368537923366476e-05, + "loss": 0.3110755681991577, + "step": 1706 + }, + { + "epoch": 0.8436920795749413, + "grad_norm": 1.0815136095330147, + "learning_rate": 1.336084372193188e-05, + "loss": 0.28063881397247314, + "step": 1707 + }, + { + "epoch": 0.8441863338687755, + "grad_norm": 1.1539434345644544, + "learning_rate": 1.3353147276846042e-05, + "loss": 0.31297358870506287, + "step": 1708 + }, + { + "epoch": 0.8446805881626097, + "grad_norm": 1.155638509555895, + "learning_rate": 1.3345448593246986e-05, + "loss": 0.30750149488449097, + "step": 1709 + }, + { + "epoch": 0.8451748424564438, + "grad_norm": 1.0259778822912606, + "learning_rate": 1.333774767627425e-05, + "loss": 0.2665224075317383, + "step": 1710 + }, + { + "epoch": 0.845669096750278, + "grad_norm": 1.0618832452009934, + "learning_rate": 1.3330044531068858e-05, + "loss": 0.28920280933380127, + "step": 1711 + }, + { + "epoch": 0.8461633510441122, + "grad_norm": 1.0688762844449171, + "learning_rate": 1.332233916277332e-05, + "loss": 0.2678643465042114, + "step": 1712 + }, + { + "epoch": 0.8466576053379464, + "grad_norm": 1.1389370638959122, + "learning_rate": 1.3314631576531623e-05, + "loss": 0.33682242035865784, + "step": 1713 + }, + { + "epoch": 0.8471518596317805, + "grad_norm": 1.2088936099945806, + "learning_rate": 1.330692177748925e-05, + "loss": 0.36704546213150024, + "step": 1714 + }, + { + "epoch": 0.8476461139256147, + "grad_norm": 1.0972613113130176, + "learning_rate": 1.3299209770793144e-05, + "loss": 0.3183630108833313, + "step": 1715 + }, + { + "epoch": 0.8481403682194489, + "grad_norm": 1.0799352919589156, + "learning_rate": 1.3291495561591736e-05, + "loss": 0.27138596773147583, + "step": 1716 + }, + { + "epoch": 0.848634622513283, + "grad_norm": 1.1332588592044, + "learning_rate": 1.3283779155034925e-05, + "loss": 0.30252328515052795, + "step": 1717 + }, + { + "epoch": 0.8491288768071172, + "grad_norm": 1.1212549613542353, + "learning_rate": 1.3276060556274067e-05, + "loss": 0.29494598507881165, + "step": 1718 + }, + { + "epoch": 0.8496231311009514, + "grad_norm": 1.01148770717553, + "learning_rate": 1.3268339770461988e-05, + "loss": 0.2822422981262207, + "step": 1719 + }, + { + "epoch": 0.8501173853947857, + "grad_norm": 1.134036508201843, + "learning_rate": 1.3260616802752979e-05, + "loss": 0.3348005712032318, + "step": 1720 + }, + { + "epoch": 0.8506116396886197, + "grad_norm": 1.171053745899539, + "learning_rate": 1.3252891658302782e-05, + "loss": 0.3146229088306427, + "step": 1721 + }, + { + "epoch": 0.851105893982454, + "grad_norm": 1.1635384669674214, + "learning_rate": 1.3245164342268592e-05, + "loss": 0.34189414978027344, + "step": 1722 + }, + { + "epoch": 0.8516001482762882, + "grad_norm": 1.0403207041973201, + "learning_rate": 1.3237434859809055e-05, + "loss": 0.2967323958873749, + "step": 1723 + }, + { + "epoch": 0.8520944025701224, + "grad_norm": 1.1011411329678815, + "learning_rate": 1.3229703216084262e-05, + "loss": 0.329689085483551, + "step": 1724 + }, + { + "epoch": 0.8525886568639565, + "grad_norm": 1.1910259713127598, + "learning_rate": 1.3221969416255751e-05, + "loss": 0.33041107654571533, + "step": 1725 + }, + { + "epoch": 0.8530829111577907, + "grad_norm": 1.144468406694428, + "learning_rate": 1.321423346548649e-05, + "loss": 0.30197203159332275, + "step": 1726 + }, + { + "epoch": 0.8535771654516249, + "grad_norm": 1.1709857904248526, + "learning_rate": 1.3206495368940897e-05, + "loss": 0.29060906171798706, + "step": 1727 + }, + { + "epoch": 0.8540714197454591, + "grad_norm": 1.1769143322358042, + "learning_rate": 1.3198755131784808e-05, + "loss": 0.3119436502456665, + "step": 1728 + }, + { + "epoch": 0.8545656740392932, + "grad_norm": 1.1825299188260439, + "learning_rate": 1.31910127591855e-05, + "loss": 0.35256415605545044, + "step": 1729 + }, + { + "epoch": 0.8550599283331274, + "grad_norm": 1.169751710502227, + "learning_rate": 1.3183268256311665e-05, + "loss": 0.3093785345554352, + "step": 1730 + }, + { + "epoch": 0.8555541826269616, + "grad_norm": 1.0555303314758304, + "learning_rate": 1.317552162833343e-05, + "loss": 0.2713086009025574, + "step": 1731 + }, + { + "epoch": 0.8560484369207958, + "grad_norm": 1.1667835049569328, + "learning_rate": 1.3167772880422325e-05, + "loss": 0.3135699927806854, + "step": 1732 + }, + { + "epoch": 0.8565426912146299, + "grad_norm": 1.2127716623193672, + "learning_rate": 1.3160022017751308e-05, + "loss": 0.3077283501625061, + "step": 1733 + }, + { + "epoch": 0.8570369455084641, + "grad_norm": 1.0914461784602205, + "learning_rate": 1.3152269045494744e-05, + "loss": 0.2900918424129486, + "step": 1734 + }, + { + "epoch": 0.8575311998022983, + "grad_norm": 1.1010374385853228, + "learning_rate": 1.3144513968828406e-05, + "loss": 0.30828869342803955, + "step": 1735 + }, + { + "epoch": 0.8580254540961325, + "grad_norm": 1.2038482894608615, + "learning_rate": 1.3136756792929469e-05, + "loss": 0.32526400685310364, + "step": 1736 + }, + { + "epoch": 0.8585197083899666, + "grad_norm": 1.2033734524328428, + "learning_rate": 1.3128997522976518e-05, + "loss": 0.35023608803749084, + "step": 1737 + }, + { + "epoch": 0.8590139626838008, + "grad_norm": 1.0100870731750684, + "learning_rate": 1.312123616414953e-05, + "loss": 0.27287641167640686, + "step": 1738 + }, + { + "epoch": 0.859508216977635, + "grad_norm": 1.1797907328737691, + "learning_rate": 1.3113472721629871e-05, + "loss": 0.346009761095047, + "step": 1739 + }, + { + "epoch": 0.8600024712714691, + "grad_norm": 1.0724791595798373, + "learning_rate": 1.3105707200600312e-05, + "loss": 0.3297504186630249, + "step": 1740 + }, + { + "epoch": 0.8604967255653033, + "grad_norm": 1.1244989642514696, + "learning_rate": 1.3097939606245005e-05, + "loss": 0.29835087060928345, + "step": 1741 + }, + { + "epoch": 0.8609909798591375, + "grad_norm": 1.1715549927893771, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.31466037034988403, + "step": 1742 + }, + { + "epoch": 0.8614852341529717, + "grad_norm": 1.1357283105102867, + "learning_rate": 1.3082398218300646e-05, + "loss": 0.32722294330596924, + "step": 1743 + }, + { + "epoch": 0.8619794884468058, + "grad_norm": 1.0679826147860039, + "learning_rate": 1.3074624435086809e-05, + "loss": 0.2603963613510132, + "step": 1744 + }, + { + "epoch": 0.86247374274064, + "grad_norm": 1.3274641459704042, + "learning_rate": 1.3066848599297633e-05, + "loss": 0.3100607991218567, + "step": 1745 + }, + { + "epoch": 0.8629679970344742, + "grad_norm": 1.0941880035602098, + "learning_rate": 1.3059070716124145e-05, + "loss": 0.2772334814071655, + "step": 1746 + }, + { + "epoch": 0.8634622513283084, + "grad_norm": 1.0597299652706509, + "learning_rate": 1.305129079075876e-05, + "loss": 0.3097267746925354, + "step": 1747 + }, + { + "epoch": 0.8639565056221425, + "grad_norm": 0.9828148026871187, + "learning_rate": 1.304350882839524e-05, + "loss": 0.24734097719192505, + "step": 1748 + }, + { + "epoch": 0.8644507599159768, + "grad_norm": 1.1998311484351691, + "learning_rate": 1.3035724834228713e-05, + "loss": 0.32148587703704834, + "step": 1749 + }, + { + "epoch": 0.864945014209811, + "grad_norm": 1.0741747588028856, + "learning_rate": 1.3027938813455663e-05, + "loss": 0.3037404417991638, + "step": 1750 + }, + { + "epoch": 0.8654392685036452, + "grad_norm": 1.011225470292042, + "learning_rate": 1.3020150771273925e-05, + "loss": 0.30760154128074646, + "step": 1751 + }, + { + "epoch": 0.8659335227974793, + "grad_norm": 1.2184140511087935, + "learning_rate": 1.3012360712882681e-05, + "loss": 0.3169519305229187, + "step": 1752 + }, + { + "epoch": 0.8664277770913135, + "grad_norm": 1.1637013896789081, + "learning_rate": 1.300456864348247e-05, + "loss": 0.32497861981391907, + "step": 1753 + }, + { + "epoch": 0.8669220313851477, + "grad_norm": 1.1775374726585146, + "learning_rate": 1.2996774568275163e-05, + "loss": 0.3318047821521759, + "step": 1754 + }, + { + "epoch": 0.8674162856789819, + "grad_norm": 1.1616476146855954, + "learning_rate": 1.298897849246397e-05, + "loss": 0.32553863525390625, + "step": 1755 + }, + { + "epoch": 0.867910539972816, + "grad_norm": 1.2503555115463478, + "learning_rate": 1.2981180421253446e-05, + "loss": 0.36457520723342896, + "step": 1756 + }, + { + "epoch": 0.8684047942666502, + "grad_norm": 1.169094604095011, + "learning_rate": 1.2973380359849466e-05, + "loss": 0.3038361668586731, + "step": 1757 + }, + { + "epoch": 0.8688990485604844, + "grad_norm": 1.1027242700855706, + "learning_rate": 1.2965578313459246e-05, + "loss": 0.3219846785068512, + "step": 1758 + }, + { + "epoch": 0.8693933028543186, + "grad_norm": 1.1142620036406827, + "learning_rate": 1.2957774287291311e-05, + "loss": 0.3180781304836273, + "step": 1759 + }, + { + "epoch": 0.8698875571481527, + "grad_norm": 1.0882143758359024, + "learning_rate": 1.2949968286555527e-05, + "loss": 0.27302947640419006, + "step": 1760 + }, + { + "epoch": 0.8703818114419869, + "grad_norm": 1.1731384509972838, + "learning_rate": 1.2942160316463066e-05, + "loss": 0.31756314635276794, + "step": 1761 + }, + { + "epoch": 0.8708760657358211, + "grad_norm": 1.1248252560155063, + "learning_rate": 1.2934350382226412e-05, + "loss": 0.2921680510044098, + "step": 1762 + }, + { + "epoch": 0.8713703200296553, + "grad_norm": 1.2402414216219324, + "learning_rate": 1.2926538489059373e-05, + "loss": 0.36426985263824463, + "step": 1763 + }, + { + "epoch": 0.8718645743234894, + "grad_norm": 1.1314972333554063, + "learning_rate": 1.2918724642177054e-05, + "loss": 0.31873831152915955, + "step": 1764 + }, + { + "epoch": 0.8723588286173236, + "grad_norm": 1.0875738556359984, + "learning_rate": 1.2910908846795867e-05, + "loss": 0.30952733755111694, + "step": 1765 + }, + { + "epoch": 0.8728530829111578, + "grad_norm": 1.149047421683754, + "learning_rate": 1.2903091108133523e-05, + "loss": 0.33339035511016846, + "step": 1766 + }, + { + "epoch": 0.8733473372049919, + "grad_norm": 1.1272534729456736, + "learning_rate": 1.2895271431409038e-05, + "loss": 0.31531351804733276, + "step": 1767 + }, + { + "epoch": 0.8738415914988261, + "grad_norm": 1.0832848102714157, + "learning_rate": 1.2887449821842713e-05, + "loss": 0.3016526401042938, + "step": 1768 + }, + { + "epoch": 0.8743358457926603, + "grad_norm": 1.1203275148695215, + "learning_rate": 1.2879626284656141e-05, + "loss": 0.3364630341529846, + "step": 1769 + }, + { + "epoch": 0.8748301000864945, + "grad_norm": 1.049317791331816, + "learning_rate": 1.287180082507221e-05, + "loss": 0.29755398631095886, + "step": 1770 + }, + { + "epoch": 0.8753243543803286, + "grad_norm": 1.1616312337400467, + "learning_rate": 1.286397344831508e-05, + "loss": 0.2986103892326355, + "step": 1771 + }, + { + "epoch": 0.8758186086741628, + "grad_norm": 1.0876002245947722, + "learning_rate": 1.2856144159610197e-05, + "loss": 0.31291434168815613, + "step": 1772 + }, + { + "epoch": 0.876312862967997, + "grad_norm": 1.0995747953439883, + "learning_rate": 1.2848312964184283e-05, + "loss": 0.28285568952560425, + "step": 1773 + }, + { + "epoch": 0.8768071172618312, + "grad_norm": 1.1726581514839194, + "learning_rate": 1.2840479867265331e-05, + "loss": 0.3319891095161438, + "step": 1774 + }, + { + "epoch": 0.8773013715556653, + "grad_norm": 1.1459648615093148, + "learning_rate": 1.2832644874082604e-05, + "loss": 0.3265117406845093, + "step": 1775 + }, + { + "epoch": 0.8777956258494996, + "grad_norm": 1.1247446577558389, + "learning_rate": 1.2824807989866635e-05, + "loss": 0.32061511278152466, + "step": 1776 + }, + { + "epoch": 0.8782898801433338, + "grad_norm": 1.2889890707472673, + "learning_rate": 1.2816969219849214e-05, + "loss": 0.34278666973114014, + "step": 1777 + }, + { + "epoch": 0.878784134437168, + "grad_norm": 1.0560963427574246, + "learning_rate": 1.2809128569263387e-05, + "loss": 0.28335195779800415, + "step": 1778 + }, + { + "epoch": 0.8792783887310021, + "grad_norm": 1.261751311219404, + "learning_rate": 1.2801286043343468e-05, + "loss": 0.35037046670913696, + "step": 1779 + }, + { + "epoch": 0.8797726430248363, + "grad_norm": 1.2110241423671546, + "learning_rate": 1.2793441647325012e-05, + "loss": 0.30058878660202026, + "step": 1780 + }, + { + "epoch": 0.8802668973186705, + "grad_norm": 1.1234244113929972, + "learning_rate": 1.2785595386444824e-05, + "loss": 0.29526466131210327, + "step": 1781 + }, + { + "epoch": 0.8807611516125047, + "grad_norm": 1.128737037655087, + "learning_rate": 1.2777747265940956e-05, + "loss": 0.3194332718849182, + "step": 1782 + }, + { + "epoch": 0.8812554059063388, + "grad_norm": 1.1751557862784823, + "learning_rate": 1.2769897291052709e-05, + "loss": 0.33527326583862305, + "step": 1783 + }, + { + "epoch": 0.881749660200173, + "grad_norm": 1.1396387575620477, + "learning_rate": 1.2762045467020601e-05, + "loss": 0.3277815580368042, + "step": 1784 + }, + { + "epoch": 0.8822439144940072, + "grad_norm": 1.2472488401817894, + "learning_rate": 1.2754191799086406e-05, + "loss": 0.31030380725860596, + "step": 1785 + }, + { + "epoch": 0.8827381687878414, + "grad_norm": 1.2316289072611675, + "learning_rate": 1.274633629249312e-05, + "loss": 0.34496408700942993, + "step": 1786 + }, + { + "epoch": 0.8832324230816755, + "grad_norm": 1.1907817971144352, + "learning_rate": 1.2738478952484964e-05, + "loss": 0.31008201837539673, + "step": 1787 + }, + { + "epoch": 0.8837266773755097, + "grad_norm": 1.1874449425538405, + "learning_rate": 1.2730619784307388e-05, + "loss": 0.35956043004989624, + "step": 1788 + }, + { + "epoch": 0.8842209316693439, + "grad_norm": 1.075617061625236, + "learning_rate": 1.272275879320706e-05, + "loss": 0.2944573760032654, + "step": 1789 + }, + { + "epoch": 0.8847151859631781, + "grad_norm": 1.0739187183942678, + "learning_rate": 1.2714895984431863e-05, + "loss": 0.2941366136074066, + "step": 1790 + }, + { + "epoch": 0.8852094402570122, + "grad_norm": 1.3130838842625934, + "learning_rate": 1.2707031363230901e-05, + "loss": 0.34683144092559814, + "step": 1791 + }, + { + "epoch": 0.8857036945508464, + "grad_norm": 1.1309361657268096, + "learning_rate": 1.2699164934854475e-05, + "loss": 0.3014514744281769, + "step": 1792 + }, + { + "epoch": 0.8861979488446806, + "grad_norm": 1.1642635843186193, + "learning_rate": 1.2691296704554112e-05, + "loss": 0.2749955654144287, + "step": 1793 + }, + { + "epoch": 0.8866922031385147, + "grad_norm": 1.2406193113190336, + "learning_rate": 1.2683426677582518e-05, + "loss": 0.3707960844039917, + "step": 1794 + }, + { + "epoch": 0.8871864574323489, + "grad_norm": 1.098057655891237, + "learning_rate": 1.2675554859193615e-05, + "loss": 0.3122541606426239, + "step": 1795 + }, + { + "epoch": 0.8876807117261831, + "grad_norm": 1.1564617646628, + "learning_rate": 1.2667681254642521e-05, + "loss": 0.3072753846645355, + "step": 1796 + }, + { + "epoch": 0.8881749660200173, + "grad_norm": 1.1000251936377918, + "learning_rate": 1.2659805869185534e-05, + "loss": 0.27002331614494324, + "step": 1797 + }, + { + "epoch": 0.8886692203138514, + "grad_norm": 1.0649961261949041, + "learning_rate": 1.2651928708080155e-05, + "loss": 0.2775167226791382, + "step": 1798 + }, + { + "epoch": 0.8891634746076856, + "grad_norm": 1.0134446617324497, + "learning_rate": 1.2644049776585061e-05, + "loss": 0.30023425817489624, + "step": 1799 + }, + { + "epoch": 0.8896577289015198, + "grad_norm": 1.0536326288458973, + "learning_rate": 1.2636169079960116e-05, + "loss": 0.29491451382637024, + "step": 1800 + }, + { + "epoch": 0.890151983195354, + "grad_norm": 1.1393442237009457, + "learning_rate": 1.2628286623466359e-05, + "loss": 0.3069722652435303, + "step": 1801 + }, + { + "epoch": 0.8906462374891881, + "grad_norm": 1.0432479678380786, + "learning_rate": 1.2620402412366006e-05, + "loss": 0.30594444274902344, + "step": 1802 + }, + { + "epoch": 0.8911404917830223, + "grad_norm": 1.224543789313884, + "learning_rate": 1.2612516451922442e-05, + "loss": 0.278346985578537, + "step": 1803 + }, + { + "epoch": 0.8916347460768566, + "grad_norm": 1.2157715105375322, + "learning_rate": 1.2604628747400227e-05, + "loss": 0.2985970973968506, + "step": 1804 + }, + { + "epoch": 0.8921290003706908, + "grad_norm": 1.1226040901686842, + "learning_rate": 1.259673930406507e-05, + "loss": 0.31054627895355225, + "step": 1805 + }, + { + "epoch": 0.8926232546645249, + "grad_norm": 1.1289150487077437, + "learning_rate": 1.258884812718386e-05, + "loss": 0.28903907537460327, + "step": 1806 + }, + { + "epoch": 0.8931175089583591, + "grad_norm": 1.132448586420589, + "learning_rate": 1.258095522202463e-05, + "loss": 0.2937915027141571, + "step": 1807 + }, + { + "epoch": 0.8936117632521933, + "grad_norm": 1.251676196860064, + "learning_rate": 1.257306059385657e-05, + "loss": 0.3038950562477112, + "step": 1808 + }, + { + "epoch": 0.8941060175460275, + "grad_norm": 1.235431629529867, + "learning_rate": 1.2565164247950023e-05, + "loss": 0.3081057071685791, + "step": 1809 + }, + { + "epoch": 0.8946002718398616, + "grad_norm": 1.1023391071403523, + "learning_rate": 1.2557266189576478e-05, + "loss": 0.2608702480792999, + "step": 1810 + }, + { + "epoch": 0.8950945261336958, + "grad_norm": 1.1733196648921136, + "learning_rate": 1.254936642400856e-05, + "loss": 0.2829548120498657, + "step": 1811 + }, + { + "epoch": 0.89558878042753, + "grad_norm": 1.1986500261226571, + "learning_rate": 1.2541464956520045e-05, + "loss": 0.3157985508441925, + "step": 1812 + }, + { + "epoch": 0.8960830347213642, + "grad_norm": 1.1805347109907394, + "learning_rate": 1.2533561792385837e-05, + "loss": 0.2948974370956421, + "step": 1813 + }, + { + "epoch": 0.8965772890151983, + "grad_norm": 1.1460952132203441, + "learning_rate": 1.252565693688198e-05, + "loss": 0.3011903166770935, + "step": 1814 + }, + { + "epoch": 0.8970715433090325, + "grad_norm": 1.3055245186221631, + "learning_rate": 1.2517750395285635e-05, + "loss": 0.3570353388786316, + "step": 1815 + }, + { + "epoch": 0.8975657976028667, + "grad_norm": 1.1337741379781219, + "learning_rate": 1.2509842172875105e-05, + "loss": 0.30166712403297424, + "step": 1816 + }, + { + "epoch": 0.8980600518967009, + "grad_norm": 1.104423129790351, + "learning_rate": 1.2501932274929797e-05, + "loss": 0.3260636329650879, + "step": 1817 + }, + { + "epoch": 0.898554306190535, + "grad_norm": 1.0975906386988825, + "learning_rate": 1.2494020706730251e-05, + "loss": 0.31647035479545593, + "step": 1818 + }, + { + "epoch": 0.8990485604843692, + "grad_norm": 1.2099925292750648, + "learning_rate": 1.2486107473558118e-05, + "loss": 0.3059273064136505, + "step": 1819 + }, + { + "epoch": 0.8995428147782034, + "grad_norm": 1.1238527206258473, + "learning_rate": 1.247819258069616e-05, + "loss": 0.31050577759742737, + "step": 1820 + }, + { + "epoch": 0.9000370690720375, + "grad_norm": 1.167261413544568, + "learning_rate": 1.2470276033428241e-05, + "loss": 0.3199779689311981, + "step": 1821 + }, + { + "epoch": 0.9005313233658717, + "grad_norm": 1.1634621252313533, + "learning_rate": 1.2462357837039338e-05, + "loss": 0.31346091628074646, + "step": 1822 + }, + { + "epoch": 0.9010255776597059, + "grad_norm": 1.7712393639688087, + "learning_rate": 1.245443799681553e-05, + "loss": 0.31128326058387756, + "step": 1823 + }, + { + "epoch": 0.9015198319535401, + "grad_norm": 1.0665988205220116, + "learning_rate": 1.244651651804398e-05, + "loss": 0.27540329098701477, + "step": 1824 + }, + { + "epoch": 0.9020140862473742, + "grad_norm": 1.08908725997666, + "learning_rate": 1.243859340601296e-05, + "loss": 0.2613363265991211, + "step": 1825 + }, + { + "epoch": 0.9025083405412084, + "grad_norm": 1.1499718586586674, + "learning_rate": 1.2430668666011825e-05, + "loss": 0.30530184507369995, + "step": 1826 + }, + { + "epoch": 0.9030025948350426, + "grad_norm": 1.0907140946424856, + "learning_rate": 1.2422742303331022e-05, + "loss": 0.3223349153995514, + "step": 1827 + }, + { + "epoch": 0.9034968491288768, + "grad_norm": 1.131086049145241, + "learning_rate": 1.2414814323262067e-05, + "loss": 0.32017287611961365, + "step": 1828 + }, + { + "epoch": 0.9039911034227109, + "grad_norm": 1.2183101338845472, + "learning_rate": 1.2406884731097582e-05, + "loss": 0.2965891361236572, + "step": 1829 + }, + { + "epoch": 0.9044853577165451, + "grad_norm": 1.535326476461108, + "learning_rate": 1.2398953532131235e-05, + "loss": 0.3517727851867676, + "step": 1830 + }, + { + "epoch": 0.9049796120103794, + "grad_norm": 1.0055415215772612, + "learning_rate": 1.2391020731657788e-05, + "loss": 0.26107311248779297, + "step": 1831 + }, + { + "epoch": 0.9054738663042136, + "grad_norm": 1.16405975535122, + "learning_rate": 1.2383086334973065e-05, + "loss": 0.31327998638153076, + "step": 1832 + }, + { + "epoch": 0.9059681205980477, + "grad_norm": 1.1376729658041929, + "learning_rate": 1.2375150347373956e-05, + "loss": 0.2708127498626709, + "step": 1833 + }, + { + "epoch": 0.9064623748918819, + "grad_norm": 1.2578266997569258, + "learning_rate": 1.236721277415841e-05, + "loss": 0.3264025151729584, + "step": 1834 + }, + { + "epoch": 0.9069566291857161, + "grad_norm": 1.1552886471917594, + "learning_rate": 1.2359273620625438e-05, + "loss": 0.3226723074913025, + "step": 1835 + }, + { + "epoch": 0.9074508834795503, + "grad_norm": 1.095230882373492, + "learning_rate": 1.2351332892075109e-05, + "loss": 0.2895771861076355, + "step": 1836 + }, + { + "epoch": 0.9079451377733844, + "grad_norm": 1.149733162695983, + "learning_rate": 1.234339059380854e-05, + "loss": 0.3316076397895813, + "step": 1837 + }, + { + "epoch": 0.9084393920672186, + "grad_norm": 1.10037368979265, + "learning_rate": 1.2335446731127887e-05, + "loss": 0.29858651757240295, + "step": 1838 + }, + { + "epoch": 0.9089336463610528, + "grad_norm": 1.2759313559643695, + "learning_rate": 1.2327501309336371e-05, + "loss": 0.31340792775154114, + "step": 1839 + }, + { + "epoch": 0.909427900654887, + "grad_norm": 1.038203202123546, + "learning_rate": 1.2319554333738236e-05, + "loss": 0.27344945073127747, + "step": 1840 + }, + { + "epoch": 0.9099221549487211, + "grad_norm": 1.1811761633875792, + "learning_rate": 1.2311605809638766e-05, + "loss": 0.27349725365638733, + "step": 1841 + }, + { + "epoch": 0.9104164092425553, + "grad_norm": 1.2931266398373575, + "learning_rate": 1.2303655742344292e-05, + "loss": 0.28933316469192505, + "step": 1842 + }, + { + "epoch": 0.9109106635363895, + "grad_norm": 1.1360201134878805, + "learning_rate": 1.2295704137162158e-05, + "loss": 0.3315466344356537, + "step": 1843 + }, + { + "epoch": 0.9114049178302237, + "grad_norm": 1.3735184410271417, + "learning_rate": 1.2287750999400743e-05, + "loss": 0.3227408528327942, + "step": 1844 + }, + { + "epoch": 0.9118991721240578, + "grad_norm": 1.1237568254849295, + "learning_rate": 1.2279796334369447e-05, + "loss": 0.30476877093315125, + "step": 1845 + }, + { + "epoch": 0.912393426417892, + "grad_norm": 1.1863082805694927, + "learning_rate": 1.2271840147378697e-05, + "loss": 0.29941046237945557, + "step": 1846 + }, + { + "epoch": 0.9128876807117262, + "grad_norm": 1.040665730868043, + "learning_rate": 1.2263882443739923e-05, + "loss": 0.26635122299194336, + "step": 1847 + }, + { + "epoch": 0.9133819350055603, + "grad_norm": 1.2009768589181191, + "learning_rate": 1.2255923228765574e-05, + "loss": 0.32384809851646423, + "step": 1848 + }, + { + "epoch": 0.9138761892993945, + "grad_norm": 1.1005403546735195, + "learning_rate": 1.2247962507769113e-05, + "loss": 0.2830178141593933, + "step": 1849 + }, + { + "epoch": 0.9143704435932287, + "grad_norm": 1.146384025635135, + "learning_rate": 1.2240000286065003e-05, + "loss": 0.32860931754112244, + "step": 1850 + }, + { + "epoch": 0.9148646978870629, + "grad_norm": 1.1448106720128721, + "learning_rate": 1.2232036568968703e-05, + "loss": 0.2820647954940796, + "step": 1851 + }, + { + "epoch": 0.915358952180897, + "grad_norm": 1.2180250787611469, + "learning_rate": 1.2224071361796685e-05, + "loss": 0.3368694484233856, + "step": 1852 + }, + { + "epoch": 0.9158532064747312, + "grad_norm": 1.1414152376911786, + "learning_rate": 1.2216104669866405e-05, + "loss": 0.32594096660614014, + "step": 1853 + }, + { + "epoch": 0.9163474607685654, + "grad_norm": 1.129839927585001, + "learning_rate": 1.2208136498496307e-05, + "loss": 0.3383556008338928, + "step": 1854 + }, + { + "epoch": 0.9168417150623996, + "grad_norm": 1.1516691565096748, + "learning_rate": 1.2200166853005837e-05, + "loss": 0.2655363976955414, + "step": 1855 + }, + { + "epoch": 0.9173359693562337, + "grad_norm": 1.067780593954706, + "learning_rate": 1.2192195738715414e-05, + "loss": 0.30512773990631104, + "step": 1856 + }, + { + "epoch": 0.917830223650068, + "grad_norm": 1.3304322049937938, + "learning_rate": 1.2184223160946433e-05, + "loss": 0.34026995301246643, + "step": 1857 + }, + { + "epoch": 0.9183244779439022, + "grad_norm": 1.2079696433735554, + "learning_rate": 1.2176249125021281e-05, + "loss": 0.29324328899383545, + "step": 1858 + }, + { + "epoch": 0.9188187322377364, + "grad_norm": 1.454623275441196, + "learning_rate": 1.2168273636263308e-05, + "loss": 0.3114206790924072, + "step": 1859 + }, + { + "epoch": 0.9193129865315705, + "grad_norm": 1.1301917440411622, + "learning_rate": 1.2160296699996839e-05, + "loss": 0.2829141914844513, + "step": 1860 + }, + { + "epoch": 0.9198072408254047, + "grad_norm": 1.0721269081592821, + "learning_rate": 1.2152318321547156e-05, + "loss": 0.2735600769519806, + "step": 1861 + }, + { + "epoch": 0.9203014951192389, + "grad_norm": 1.0465335380212768, + "learning_rate": 1.2144338506240519e-05, + "loss": 0.3160930573940277, + "step": 1862 + }, + { + "epoch": 0.9207957494130731, + "grad_norm": 1.0735769631967078, + "learning_rate": 1.2136357259404128e-05, + "loss": 0.26677393913269043, + "step": 1863 + }, + { + "epoch": 0.9212900037069072, + "grad_norm": 1.1305004585474958, + "learning_rate": 1.2128374586366159e-05, + "loss": 0.33033064007759094, + "step": 1864 + }, + { + "epoch": 0.9217842580007414, + "grad_norm": 1.1210908469065626, + "learning_rate": 1.2120390492455727e-05, + "loss": 0.28271663188934326, + "step": 1865 + }, + { + "epoch": 0.9222785122945756, + "grad_norm": 1.1196923913120616, + "learning_rate": 1.21124049830029e-05, + "loss": 0.3116013705730438, + "step": 1866 + }, + { + "epoch": 0.9227727665884098, + "grad_norm": 1.1258678919425735, + "learning_rate": 1.2104418063338686e-05, + "loss": 0.30614158511161804, + "step": 1867 + }, + { + "epoch": 0.9232670208822439, + "grad_norm": 1.2128311616527454, + "learning_rate": 1.2096429738795041e-05, + "loss": 0.34351983666419983, + "step": 1868 + }, + { + "epoch": 0.9237612751760781, + "grad_norm": 1.2814647055659063, + "learning_rate": 1.2088440014704858e-05, + "loss": 0.31006965041160583, + "step": 1869 + }, + { + "epoch": 0.9242555294699123, + "grad_norm": 1.093225958461299, + "learning_rate": 1.2080448896401964e-05, + "loss": 0.2671147584915161, + "step": 1870 + }, + { + "epoch": 0.9247497837637465, + "grad_norm": 1.2298582810409653, + "learning_rate": 1.207245638922111e-05, + "loss": 0.29123416543006897, + "step": 1871 + }, + { + "epoch": 0.9252440380575806, + "grad_norm": 1.1613532669189326, + "learning_rate": 1.2064462498497984e-05, + "loss": 0.31838539242744446, + "step": 1872 + }, + { + "epoch": 0.9257382923514148, + "grad_norm": 1.1861407153761483, + "learning_rate": 1.205646722956919e-05, + "loss": 0.3158906102180481, + "step": 1873 + }, + { + "epoch": 0.926232546645249, + "grad_norm": 1.2339017273841688, + "learning_rate": 1.2048470587772257e-05, + "loss": 0.3679552674293518, + "step": 1874 + }, + { + "epoch": 0.9267268009390831, + "grad_norm": 1.1210108605660978, + "learning_rate": 1.204047257844563e-05, + "loss": 0.2891008257865906, + "step": 1875 + }, + { + "epoch": 0.9272210552329173, + "grad_norm": 1.1110723692294957, + "learning_rate": 1.2032473206928663e-05, + "loss": 0.3207235634326935, + "step": 1876 + }, + { + "epoch": 0.9277153095267515, + "grad_norm": 1.203189154519193, + "learning_rate": 1.2024472478561624e-05, + "loss": 0.2710658311843872, + "step": 1877 + }, + { + "epoch": 0.9282095638205857, + "grad_norm": 1.1156076578026985, + "learning_rate": 1.2016470398685685e-05, + "loss": 0.2554836869239807, + "step": 1878 + }, + { + "epoch": 0.9287038181144198, + "grad_norm": 1.079454168196498, + "learning_rate": 1.2008466972642921e-05, + "loss": 0.2822943329811096, + "step": 1879 + }, + { + "epoch": 0.929198072408254, + "grad_norm": 1.2007950112208574, + "learning_rate": 1.20004622057763e-05, + "loss": 0.3447754681110382, + "step": 1880 + }, + { + "epoch": 0.9296923267020882, + "grad_norm": 1.1885607345269107, + "learning_rate": 1.1992456103429694e-05, + "loss": 0.3009227514266968, + "step": 1881 + }, + { + "epoch": 0.9301865809959224, + "grad_norm": 1.3491102685763696, + "learning_rate": 1.1984448670947863e-05, + "loss": 0.33154594898223877, + "step": 1882 + }, + { + "epoch": 0.9306808352897565, + "grad_norm": 1.7075348805187878, + "learning_rate": 1.1976439913676457e-05, + "loss": 0.32905343174934387, + "step": 1883 + }, + { + "epoch": 0.9311750895835907, + "grad_norm": 1.2010662669423082, + "learning_rate": 1.1968429836962e-05, + "loss": 0.34757447242736816, + "step": 1884 + }, + { + "epoch": 0.931669343877425, + "grad_norm": 1.2626693752273819, + "learning_rate": 1.1960418446151912e-05, + "loss": 0.29980987310409546, + "step": 1885 + }, + { + "epoch": 0.9321635981712592, + "grad_norm": 1.081439601568963, + "learning_rate": 1.1952405746594477e-05, + "loss": 0.3106808662414551, + "step": 1886 + }, + { + "epoch": 0.9326578524650933, + "grad_norm": 1.2465315131717423, + "learning_rate": 1.1944391743638863e-05, + "loss": 0.3222411572933197, + "step": 1887 + }, + { + "epoch": 0.9331521067589275, + "grad_norm": 1.117897007008322, + "learning_rate": 1.1936376442635104e-05, + "loss": 0.3365646302700043, + "step": 1888 + }, + { + "epoch": 0.9336463610527617, + "grad_norm": 1.2223325106102665, + "learning_rate": 1.1928359848934101e-05, + "loss": 0.32500627636909485, + "step": 1889 + }, + { + "epoch": 0.9341406153465959, + "grad_norm": 1.1692844365001853, + "learning_rate": 1.1920341967887614e-05, + "loss": 0.31395500898361206, + "step": 1890 + }, + { + "epoch": 0.93463486964043, + "grad_norm": 1.084320264091655, + "learning_rate": 1.1912322804848268e-05, + "loss": 0.3060624301433563, + "step": 1891 + }, + { + "epoch": 0.9351291239342642, + "grad_norm": 1.042165685734395, + "learning_rate": 1.190430236516954e-05, + "loss": 0.2644454836845398, + "step": 1892 + }, + { + "epoch": 0.9356233782280984, + "grad_norm": 1.2086818065931575, + "learning_rate": 1.1896280654205765e-05, + "loss": 0.33404678106307983, + "step": 1893 + }, + { + "epoch": 0.9361176325219326, + "grad_norm": 1.0362894963118763, + "learning_rate": 1.1888257677312119e-05, + "loss": 0.28557512164115906, + "step": 1894 + }, + { + "epoch": 0.9366118868157667, + "grad_norm": 1.1281245501630466, + "learning_rate": 1.1880233439844623e-05, + "loss": 0.3332308530807495, + "step": 1895 + }, + { + "epoch": 0.9371061411096009, + "grad_norm": 1.0648316720915905, + "learning_rate": 1.1872207947160155e-05, + "loss": 0.3274528384208679, + "step": 1896 + }, + { + "epoch": 0.9376003954034351, + "grad_norm": 1.168900116977035, + "learning_rate": 1.1864181204616404e-05, + "loss": 0.297880083322525, + "step": 1897 + }, + { + "epoch": 0.9380946496972693, + "grad_norm": 1.1118774536365064, + "learning_rate": 1.1856153217571924e-05, + "loss": 0.3404296040534973, + "step": 1898 + }, + { + "epoch": 0.9385889039911034, + "grad_norm": 1.7308625403608067, + "learning_rate": 1.1848123991386073e-05, + "loss": 0.32343849539756775, + "step": 1899 + }, + { + "epoch": 0.9390831582849376, + "grad_norm": 1.0296882466024648, + "learning_rate": 1.1840093531419052e-05, + "loss": 0.26679158210754395, + "step": 1900 + }, + { + "epoch": 0.9395774125787718, + "grad_norm": 1.0019153721179144, + "learning_rate": 1.1832061843031884e-05, + "loss": 0.28106996417045593, + "step": 1901 + }, + { + "epoch": 0.9400716668726059, + "grad_norm": 1.1236721425678955, + "learning_rate": 1.1824028931586406e-05, + "loss": 0.28356847167015076, + "step": 1902 + }, + { + "epoch": 0.9405659211664401, + "grad_norm": 1.2443758247501144, + "learning_rate": 1.1815994802445274e-05, + "loss": 0.3256348669528961, + "step": 1903 + }, + { + "epoch": 0.9410601754602743, + "grad_norm": 1.1514727386744015, + "learning_rate": 1.1807959460971958e-05, + "loss": 0.2781906723976135, + "step": 1904 + }, + { + "epoch": 0.9415544297541085, + "grad_norm": 1.2599759308188183, + "learning_rate": 1.1799922912530741e-05, + "loss": 0.3129916787147522, + "step": 1905 + }, + { + "epoch": 0.9420486840479426, + "grad_norm": 1.1353254220103308, + "learning_rate": 1.1791885162486705e-05, + "loss": 0.281986266374588, + "step": 1906 + }, + { + "epoch": 0.9425429383417768, + "grad_norm": 1.2313275172087987, + "learning_rate": 1.1783846216205734e-05, + "loss": 0.33587342500686646, + "step": 1907 + }, + { + "epoch": 0.943037192635611, + "grad_norm": 1.0408682927660702, + "learning_rate": 1.1775806079054522e-05, + "loss": 0.27715635299682617, + "step": 1908 + }, + { + "epoch": 0.9435314469294452, + "grad_norm": 1.1581221243071849, + "learning_rate": 1.1767764756400541e-05, + "loss": 0.3190307915210724, + "step": 1909 + }, + { + "epoch": 0.9440257012232793, + "grad_norm": 1.1962319364965919, + "learning_rate": 1.175972225361207e-05, + "loss": 0.29336807131767273, + "step": 1910 + }, + { + "epoch": 0.9445199555171135, + "grad_norm": 1.1448708364637925, + "learning_rate": 1.1751678576058164e-05, + "loss": 0.3001596927642822, + "step": 1911 + }, + { + "epoch": 0.9450142098109477, + "grad_norm": 1.0832545536390727, + "learning_rate": 1.1743633729108672e-05, + "loss": 0.26952457427978516, + "step": 1912 + }, + { + "epoch": 0.945508464104782, + "grad_norm": 1.166519142960908, + "learning_rate": 1.1735587718134212e-05, + "loss": 0.3193609118461609, + "step": 1913 + }, + { + "epoch": 0.946002718398616, + "grad_norm": 1.2095746348772163, + "learning_rate": 1.172754054850619e-05, + "loss": 0.2810664176940918, + "step": 1914 + }, + { + "epoch": 0.9464969726924503, + "grad_norm": 1.1743627712454017, + "learning_rate": 1.1719492225596783e-05, + "loss": 0.28850311040878296, + "step": 1915 + }, + { + "epoch": 0.9469912269862845, + "grad_norm": 1.1739524489187587, + "learning_rate": 1.1711442754778936e-05, + "loss": 0.32268932461738586, + "step": 1916 + }, + { + "epoch": 0.9474854812801187, + "grad_norm": 1.2236575262685914, + "learning_rate": 1.1703392141426356e-05, + "loss": 0.3149149715900421, + "step": 1917 + }, + { + "epoch": 0.9479797355739528, + "grad_norm": 1.0472038436966378, + "learning_rate": 1.1695340390913526e-05, + "loss": 0.2537482678890228, + "step": 1918 + }, + { + "epoch": 0.948473989867787, + "grad_norm": 1.1232208833213926, + "learning_rate": 1.168728750861567e-05, + "loss": 0.2611936330795288, + "step": 1919 + }, + { + "epoch": 0.9489682441616212, + "grad_norm": 1.0077623948815433, + "learning_rate": 1.1679233499908781e-05, + "loss": 0.263653427362442, + "step": 1920 + }, + { + "epoch": 0.9494624984554554, + "grad_norm": 1.1707561168968341, + "learning_rate": 1.1671178370169604e-05, + "loss": 0.3122594952583313, + "step": 1921 + }, + { + "epoch": 0.9499567527492895, + "grad_norm": 1.1924449722361925, + "learning_rate": 1.1663122124775626e-05, + "loss": 0.3101043701171875, + "step": 1922 + }, + { + "epoch": 0.9504510070431237, + "grad_norm": 1.129901320884474, + "learning_rate": 1.1655064769105077e-05, + "loss": 0.295572966337204, + "step": 1923 + }, + { + "epoch": 0.9509452613369579, + "grad_norm": 1.1537509505815167, + "learning_rate": 1.1647006308536937e-05, + "loss": 0.29732125997543335, + "step": 1924 + }, + { + "epoch": 0.951439515630792, + "grad_norm": 1.1914038253365087, + "learning_rate": 1.1638946748450922e-05, + "loss": 0.32320737838745117, + "step": 1925 + }, + { + "epoch": 0.9519337699246262, + "grad_norm": 1.2581984463314084, + "learning_rate": 1.1630886094227471e-05, + "loss": 0.3306753933429718, + "step": 1926 + }, + { + "epoch": 0.9524280242184604, + "grad_norm": 1.0367245477692144, + "learning_rate": 1.1622824351247767e-05, + "loss": 0.2368355095386505, + "step": 1927 + }, + { + "epoch": 0.9529222785122946, + "grad_norm": 1.2216253394681036, + "learning_rate": 1.1614761524893715e-05, + "loss": 0.28470784425735474, + "step": 1928 + }, + { + "epoch": 0.9534165328061287, + "grad_norm": 1.1721810384499396, + "learning_rate": 1.160669762054794e-05, + "loss": 0.34468895196914673, + "step": 1929 + }, + { + "epoch": 0.9539107870999629, + "grad_norm": 1.1277795177992218, + "learning_rate": 1.1598632643593787e-05, + "loss": 0.30562442541122437, + "step": 1930 + }, + { + "epoch": 0.9544050413937971, + "grad_norm": 1.2141650113141733, + "learning_rate": 1.159056659941533e-05, + "loss": 0.2861478924751282, + "step": 1931 + }, + { + "epoch": 0.9548992956876313, + "grad_norm": 1.0692532214940453, + "learning_rate": 1.1582499493397332e-05, + "loss": 0.32385969161987305, + "step": 1932 + }, + { + "epoch": 0.9553935499814654, + "grad_norm": 1.173323189937386, + "learning_rate": 1.1574431330925287e-05, + "loss": 0.2935449481010437, + "step": 1933 + }, + { + "epoch": 0.9558878042752996, + "grad_norm": 1.1041433205065538, + "learning_rate": 1.156636211738538e-05, + "loss": 0.29380083084106445, + "step": 1934 + }, + { + "epoch": 0.9563820585691338, + "grad_norm": 1.1455066452691371, + "learning_rate": 1.1558291858164503e-05, + "loss": 0.2957204282283783, + "step": 1935 + }, + { + "epoch": 0.956876312862968, + "grad_norm": 1.084977751415868, + "learning_rate": 1.1550220558650246e-05, + "loss": 0.26402851939201355, + "step": 1936 + }, + { + "epoch": 0.9573705671568021, + "grad_norm": 1.1085858464768976, + "learning_rate": 1.1542148224230897e-05, + "loss": 0.29163527488708496, + "step": 1937 + }, + { + "epoch": 0.9578648214506363, + "grad_norm": 1.2120558942254267, + "learning_rate": 1.1534074860295426e-05, + "loss": 0.302470326423645, + "step": 1938 + }, + { + "epoch": 0.9583590757444705, + "grad_norm": 1.1861857419569999, + "learning_rate": 1.15260004722335e-05, + "loss": 0.25946593284606934, + "step": 1939 + }, + { + "epoch": 0.9588533300383048, + "grad_norm": 1.1153985574382288, + "learning_rate": 1.1517925065435457e-05, + "loss": 0.2680559456348419, + "step": 1940 + }, + { + "epoch": 0.9593475843321388, + "grad_norm": 1.2104349484077064, + "learning_rate": 1.1509848645292334e-05, + "loss": 0.2684473991394043, + "step": 1941 + }, + { + "epoch": 0.959841838625973, + "grad_norm": 1.245187124369965, + "learning_rate": 1.1501771217195827e-05, + "loss": 0.2795519232749939, + "step": 1942 + }, + { + "epoch": 0.9603360929198073, + "grad_norm": 1.2532047895072767, + "learning_rate": 1.1493692786538313e-05, + "loss": 0.35209575295448303, + "step": 1943 + }, + { + "epoch": 0.9608303472136415, + "grad_norm": 1.176019791514668, + "learning_rate": 1.1485613358712839e-05, + "loss": 0.3058928847312927, + "step": 1944 + }, + { + "epoch": 0.9613246015074756, + "grad_norm": 1.103375830615649, + "learning_rate": 1.1477532939113112e-05, + "loss": 0.2889159619808197, + "step": 1945 + }, + { + "epoch": 0.9618188558013098, + "grad_norm": 1.175759039350938, + "learning_rate": 1.1469451533133506e-05, + "loss": 0.30782538652420044, + "step": 1946 + }, + { + "epoch": 0.962313110095144, + "grad_norm": 1.1326992133409532, + "learning_rate": 1.1461369146169052e-05, + "loss": 0.3091726005077362, + "step": 1947 + }, + { + "epoch": 0.9628073643889782, + "grad_norm": 1.2061917553730328, + "learning_rate": 1.1453285783615438e-05, + "loss": 0.3287050724029541, + "step": 1948 + }, + { + "epoch": 0.9633016186828123, + "grad_norm": 1.1941959404182023, + "learning_rate": 1.1445201450868998e-05, + "loss": 0.31267625093460083, + "step": 1949 + }, + { + "epoch": 0.9637958729766465, + "grad_norm": 1.1346278168962094, + "learning_rate": 1.1437116153326719e-05, + "loss": 0.30775952339172363, + "step": 1950 + }, + { + "epoch": 0.9642901272704807, + "grad_norm": 1.292541938462464, + "learning_rate": 1.142902989638623e-05, + "loss": 0.3825497329235077, + "step": 1951 + }, + { + "epoch": 0.9647843815643148, + "grad_norm": 1.0454710330230295, + "learning_rate": 1.1420942685445801e-05, + "loss": 0.2866062521934509, + "step": 1952 + }, + { + "epoch": 0.965278635858149, + "grad_norm": 1.144633580750803, + "learning_rate": 1.1412854525904335e-05, + "loss": 0.27787062525749207, + "step": 1953 + }, + { + "epoch": 0.9657728901519832, + "grad_norm": 1.1290436448297894, + "learning_rate": 1.1404765423161381e-05, + "loss": 0.302572101354599, + "step": 1954 + }, + { + "epoch": 0.9662671444458174, + "grad_norm": 1.0781086639824042, + "learning_rate": 1.1396675382617097e-05, + "loss": 0.29608359932899475, + "step": 1955 + }, + { + "epoch": 0.9667613987396515, + "grad_norm": 1.1646658995895742, + "learning_rate": 1.1388584409672285e-05, + "loss": 0.28057801723480225, + "step": 1956 + }, + { + "epoch": 0.9672556530334857, + "grad_norm": 1.1188617227766138, + "learning_rate": 1.1380492509728363e-05, + "loss": 0.29628869891166687, + "step": 1957 + }, + { + "epoch": 0.9677499073273199, + "grad_norm": 1.1207660926511307, + "learning_rate": 1.1372399688187365e-05, + "loss": 0.29254984855651855, + "step": 1958 + }, + { + "epoch": 0.9682441616211541, + "grad_norm": 1.10665523309967, + "learning_rate": 1.1364305950451946e-05, + "loss": 0.32925280928611755, + "step": 1959 + }, + { + "epoch": 0.9687384159149882, + "grad_norm": 1.108029328920716, + "learning_rate": 1.1356211301925367e-05, + "loss": 0.3072258234024048, + "step": 1960 + }, + { + "epoch": 0.9692326702088224, + "grad_norm": 1.1133536367191044, + "learning_rate": 1.1348115748011499e-05, + "loss": 0.29737845063209534, + "step": 1961 + }, + { + "epoch": 0.9697269245026566, + "grad_norm": 1.1169451234105505, + "learning_rate": 1.1340019294114822e-05, + "loss": 0.27369949221611023, + "step": 1962 + }, + { + "epoch": 0.9702211787964908, + "grad_norm": 1.2861478922811351, + "learning_rate": 1.1331921945640408e-05, + "loss": 0.33116602897644043, + "step": 1963 + }, + { + "epoch": 0.9707154330903249, + "grad_norm": 1.9398235156973715, + "learning_rate": 1.1323823707993937e-05, + "loss": 0.2620438039302826, + "step": 1964 + }, + { + "epoch": 0.9712096873841591, + "grad_norm": 1.1505189829247824, + "learning_rate": 1.1315724586581673e-05, + "loss": 0.3187680244445801, + "step": 1965 + }, + { + "epoch": 0.9717039416779933, + "grad_norm": 1.2391813787863328, + "learning_rate": 1.1307624586810472e-05, + "loss": 0.3675233721733093, + "step": 1966 + }, + { + "epoch": 0.9721981959718276, + "grad_norm": 1.2521490817049854, + "learning_rate": 1.1299523714087784e-05, + "loss": 0.31064945459365845, + "step": 1967 + }, + { + "epoch": 0.9726924502656616, + "grad_norm": 1.1166975993354054, + "learning_rate": 1.1291421973821632e-05, + "loss": 0.2941773235797882, + "step": 1968 + }, + { + "epoch": 0.9731867045594959, + "grad_norm": 1.2565504643296834, + "learning_rate": 1.128331937142062e-05, + "loss": 0.3443846106529236, + "step": 1969 + }, + { + "epoch": 0.9736809588533301, + "grad_norm": 1.1142268279429304, + "learning_rate": 1.1275215912293933e-05, + "loss": 0.2815151810646057, + "step": 1970 + }, + { + "epoch": 0.9741752131471643, + "grad_norm": 1.1622346059327586, + "learning_rate": 1.1267111601851327e-05, + "loss": 0.2886476516723633, + "step": 1971 + }, + { + "epoch": 0.9746694674409984, + "grad_norm": 1.0942194208380682, + "learning_rate": 1.1259006445503116e-05, + "loss": 0.2692835330963135, + "step": 1972 + }, + { + "epoch": 0.9751637217348326, + "grad_norm": 1.1112683317978183, + "learning_rate": 1.1250900448660192e-05, + "loss": 0.2748587727546692, + "step": 1973 + }, + { + "epoch": 0.9756579760286668, + "grad_norm": 1.192989589829818, + "learning_rate": 1.1242793616734002e-05, + "loss": 0.2963098883628845, + "step": 1974 + }, + { + "epoch": 0.976152230322501, + "grad_norm": 1.1305326657315258, + "learning_rate": 1.1234685955136552e-05, + "loss": 0.28353193402290344, + "step": 1975 + }, + { + "epoch": 0.9766464846163351, + "grad_norm": 1.1967273051238179, + "learning_rate": 1.1226577469280397e-05, + "loss": 0.3308493494987488, + "step": 1976 + }, + { + "epoch": 0.9771407389101693, + "grad_norm": 1.096933031801606, + "learning_rate": 1.1218468164578653e-05, + "loss": 0.26923754811286926, + "step": 1977 + }, + { + "epoch": 0.9776349932040035, + "grad_norm": 1.5091635403311783, + "learning_rate": 1.1210358046444968e-05, + "loss": 0.2730574905872345, + "step": 1978 + }, + { + "epoch": 0.9781292474978376, + "grad_norm": 1.1338996219219686, + "learning_rate": 1.1202247120293548e-05, + "loss": 0.26464858651161194, + "step": 1979 + }, + { + "epoch": 0.9786235017916718, + "grad_norm": 1.2694994457222093, + "learning_rate": 1.1194135391539127e-05, + "loss": 0.30095499753952026, + "step": 1980 + }, + { + "epoch": 0.979117756085506, + "grad_norm": 1.3227283597348862, + "learning_rate": 1.1186022865596983e-05, + "loss": 0.3418167233467102, + "step": 1981 + }, + { + "epoch": 0.9796120103793402, + "grad_norm": 1.2780598996117225, + "learning_rate": 1.117790954788292e-05, + "loss": 0.28735262155532837, + "step": 1982 + }, + { + "epoch": 0.9801062646731743, + "grad_norm": 1.109707631385258, + "learning_rate": 1.116979544381327e-05, + "loss": 0.26816800236701965, + "step": 1983 + }, + { + "epoch": 0.9806005189670085, + "grad_norm": 1.1873089360962268, + "learning_rate": 1.1161680558804897e-05, + "loss": 0.31004661321640015, + "step": 1984 + }, + { + "epoch": 0.9810947732608427, + "grad_norm": 1.2669673078204273, + "learning_rate": 1.1153564898275184e-05, + "loss": 0.33103084564208984, + "step": 1985 + }, + { + "epoch": 0.9815890275546769, + "grad_norm": 1.3375894512262838, + "learning_rate": 1.1145448467642021e-05, + "loss": 0.3804841637611389, + "step": 1986 + }, + { + "epoch": 0.982083281848511, + "grad_norm": 1.2029739003434823, + "learning_rate": 1.1137331272323834e-05, + "loss": 0.31861352920532227, + "step": 1987 + }, + { + "epoch": 0.9825775361423452, + "grad_norm": 1.1954996526655464, + "learning_rate": 1.1129213317739539e-05, + "loss": 0.3022298216819763, + "step": 1988 + }, + { + "epoch": 0.9830717904361794, + "grad_norm": 1.3466664334904774, + "learning_rate": 1.1121094609308564e-05, + "loss": 0.38203683495521545, + "step": 1989 + }, + { + "epoch": 0.9835660447300136, + "grad_norm": 1.215882197519198, + "learning_rate": 1.1112975152450848e-05, + "loss": 0.3105717897415161, + "step": 1990 + }, + { + "epoch": 0.9840602990238477, + "grad_norm": 1.2066484647947713, + "learning_rate": 1.1104854952586827e-05, + "loss": 0.31930285692214966, + "step": 1991 + }, + { + "epoch": 0.9845545533176819, + "grad_norm": 1.1639723195264664, + "learning_rate": 1.1096734015137422e-05, + "loss": 0.3167966902256012, + "step": 1992 + }, + { + "epoch": 0.9850488076115161, + "grad_norm": 1.168704133231974, + "learning_rate": 1.1088612345524059e-05, + "loss": 0.2693050801753998, + "step": 1993 + }, + { + "epoch": 0.9855430619053503, + "grad_norm": 1.0985586655404702, + "learning_rate": 1.1080489949168651e-05, + "loss": 0.27986466884613037, + "step": 1994 + }, + { + "epoch": 0.9860373161991844, + "grad_norm": 1.1481757517161775, + "learning_rate": 1.1072366831493589e-05, + "loss": 0.26814526319503784, + "step": 1995 + }, + { + "epoch": 0.9865315704930187, + "grad_norm": 1.146921609246337, + "learning_rate": 1.1064242997921753e-05, + "loss": 0.31393951177597046, + "step": 1996 + }, + { + "epoch": 0.9870258247868529, + "grad_norm": 1.1375630444026625, + "learning_rate": 1.1056118453876496e-05, + "loss": 0.2958461344242096, + "step": 1997 + }, + { + "epoch": 0.9875200790806871, + "grad_norm": 1.137037421352785, + "learning_rate": 1.1047993204781652e-05, + "loss": 0.29744619131088257, + "step": 1998 + }, + { + "epoch": 0.9880143333745212, + "grad_norm": 1.1508003551512254, + "learning_rate": 1.1039867256061516e-05, + "loss": 0.29055094718933105, + "step": 1999 + }, + { + "epoch": 0.9885085876683554, + "grad_norm": 1.1632161121950038, + "learning_rate": 1.103174061314086e-05, + "loss": 0.29961663484573364, + "step": 2000 + }, + { + "epoch": 0.9890028419621896, + "grad_norm": 1.0841825843818378, + "learning_rate": 1.102361328144491e-05, + "loss": 0.34533610939979553, + "step": 2001 + }, + { + "epoch": 0.9894970962560238, + "grad_norm": 1.1849596678411713, + "learning_rate": 1.1015485266399362e-05, + "loss": 0.2994460463523865, + "step": 2002 + }, + { + "epoch": 0.9899913505498579, + "grad_norm": 1.2325420364808024, + "learning_rate": 1.1007356573430357e-05, + "loss": 0.34309566020965576, + "step": 2003 + }, + { + "epoch": 0.9904856048436921, + "grad_norm": 1.2050309252665437, + "learning_rate": 1.09992272079645e-05, + "loss": 0.3049868643283844, + "step": 2004 + }, + { + "epoch": 0.9909798591375263, + "grad_norm": 1.1759703775328856, + "learning_rate": 1.0991097175428833e-05, + "loss": 0.30586326122283936, + "step": 2005 + }, + { + "epoch": 0.9914741134313604, + "grad_norm": 1.1997965130034223, + "learning_rate": 1.0982966481250854e-05, + "loss": 0.29740482568740845, + "step": 2006 + }, + { + "epoch": 0.9919683677251946, + "grad_norm": 1.2400023524315222, + "learning_rate": 1.0974835130858497e-05, + "loss": 0.3218206465244293, + "step": 2007 + }, + { + "epoch": 0.9924626220190288, + "grad_norm": 1.1309419286206777, + "learning_rate": 1.0966703129680139e-05, + "loss": 0.2747582495212555, + "step": 2008 + }, + { + "epoch": 0.992956876312863, + "grad_norm": 1.2581670135770728, + "learning_rate": 1.0958570483144578e-05, + "loss": 0.33215245604515076, + "step": 2009 + }, + { + "epoch": 0.9934511306066971, + "grad_norm": 1.2834058413633842, + "learning_rate": 1.0950437196681061e-05, + "loss": 0.3149756193161011, + "step": 2010 + }, + { + "epoch": 0.9939453849005313, + "grad_norm": 1.1001136330607295, + "learning_rate": 1.0942303275719253e-05, + "loss": 0.2763513922691345, + "step": 2011 + }, + { + "epoch": 0.9944396391943655, + "grad_norm": 1.0592905887432897, + "learning_rate": 1.0934168725689239e-05, + "loss": 0.2818325161933899, + "step": 2012 + }, + { + "epoch": 0.9949338934881997, + "grad_norm": 1.1079515754649163, + "learning_rate": 1.0926033552021533e-05, + "loss": 0.2659858167171478, + "step": 2013 + }, + { + "epoch": 0.9954281477820338, + "grad_norm": 1.1926210163358253, + "learning_rate": 1.091789776014706e-05, + "loss": 0.30891451239585876, + "step": 2014 + }, + { + "epoch": 0.995922402075868, + "grad_norm": 1.2194298136031743, + "learning_rate": 1.0909761355497156e-05, + "loss": 0.33645111322402954, + "step": 2015 + }, + { + "epoch": 0.9964166563697022, + "grad_norm": 1.1110546475920504, + "learning_rate": 1.0901624343503571e-05, + "loss": 0.3086194097995758, + "step": 2016 + }, + { + "epoch": 0.9969109106635364, + "grad_norm": 1.0167201052564092, + "learning_rate": 1.089348672959846e-05, + "loss": 0.2614179253578186, + "step": 2017 + }, + { + "epoch": 0.9974051649573705, + "grad_norm": 1.2224853324284848, + "learning_rate": 1.088534851921437e-05, + "loss": 0.3300556540489197, + "step": 2018 + }, + { + "epoch": 0.9978994192512047, + "grad_norm": 1.1929848499106601, + "learning_rate": 1.087720971778426e-05, + "loss": 0.28443643450737, + "step": 2019 + }, + { + "epoch": 0.9983936735450389, + "grad_norm": 1.052677422924197, + "learning_rate": 1.0869070330741475e-05, + "loss": 0.2805534601211548, + "step": 2020 + }, + { + "epoch": 0.9988879278388731, + "grad_norm": 1.065568553175956, + "learning_rate": 1.0860930363519758e-05, + "loss": 0.28186699748039246, + "step": 2021 + }, + { + "epoch": 0.9993821821327072, + "grad_norm": 1.2171160812601536, + "learning_rate": 1.0852789821553228e-05, + "loss": 0.3527688980102539, + "step": 2022 + }, + { + "epoch": 0.9998764364265414, + "grad_norm": 1.2020406854373213, + "learning_rate": 1.08446487102764e-05, + "loss": 0.30708247423171997, + "step": 2023 + }, + { + "epoch": 1.0, + "grad_norm": 2.286184440614986, + "learning_rate": 1.083650703512416e-05, + "loss": 0.3015655279159546, + "step": 2024 + }, + { + "epoch": 1.0004942542938342, + "grad_norm": 1.2067651750081223, + "learning_rate": 1.0828364801531777e-05, + "loss": 0.29792484641075134, + "step": 2025 + }, + { + "epoch": 1.0009885085876684, + "grad_norm": 1.1529758757862274, + "learning_rate": 1.0820222014934887e-05, + "loss": 0.27995994687080383, + "step": 2026 + }, + { + "epoch": 1.0014827628815026, + "grad_norm": 1.115022133563525, + "learning_rate": 1.0812078680769501e-05, + "loss": 0.25797444581985474, + "step": 2027 + }, + { + "epoch": 1.0019770171753366, + "grad_norm": 1.1202805963305373, + "learning_rate": 1.0803934804471991e-05, + "loss": 0.2834373116493225, + "step": 2028 + }, + { + "epoch": 1.0024712714691708, + "grad_norm": 1.147731866533824, + "learning_rate": 1.079579039147909e-05, + "loss": 0.27055832743644714, + "step": 2029 + }, + { + "epoch": 1.002965525763005, + "grad_norm": 1.1916483552600579, + "learning_rate": 1.0787645447227897e-05, + "loss": 0.30029311776161194, + "step": 2030 + }, + { + "epoch": 1.0034597800568392, + "grad_norm": 1.1834514894044206, + "learning_rate": 1.0779499977155858e-05, + "loss": 0.2741442322731018, + "step": 2031 + }, + { + "epoch": 1.0039540343506734, + "grad_norm": 1.1233171341295944, + "learning_rate": 1.0771353986700767e-05, + "loss": 0.27097994089126587, + "step": 2032 + }, + { + "epoch": 1.0044482886445076, + "grad_norm": 1.1267943347727831, + "learning_rate": 1.0763207481300781e-05, + "loss": 0.2690125107765198, + "step": 2033 + }, + { + "epoch": 1.0049425429383418, + "grad_norm": 1.1312636860673373, + "learning_rate": 1.0755060466394383e-05, + "loss": 0.29656079411506653, + "step": 2034 + }, + { + "epoch": 1.005436797232176, + "grad_norm": 1.1729529368370135, + "learning_rate": 1.0746912947420407e-05, + "loss": 0.25291675329208374, + "step": 2035 + }, + { + "epoch": 1.00593105152601, + "grad_norm": 1.410951786073956, + "learning_rate": 1.0738764929818017e-05, + "loss": 0.26391562819480896, + "step": 2036 + }, + { + "epoch": 1.0064253058198442, + "grad_norm": 1.258204498994485, + "learning_rate": 1.073061641902672e-05, + "loss": 0.2850308418273926, + "step": 2037 + }, + { + "epoch": 1.0069195601136784, + "grad_norm": 1.1368887973206072, + "learning_rate": 1.0722467420486338e-05, + "loss": 0.2529013454914093, + "step": 2038 + }, + { + "epoch": 1.0074138144075127, + "grad_norm": 1.2420233139292696, + "learning_rate": 1.0714317939637028e-05, + "loss": 0.2577154040336609, + "step": 2039 + }, + { + "epoch": 1.0079080687013469, + "grad_norm": 1.1996492314644527, + "learning_rate": 1.0706167981919269e-05, + "loss": 0.28677526116371155, + "step": 2040 + }, + { + "epoch": 1.008402322995181, + "grad_norm": 1.210233649974949, + "learning_rate": 1.0698017552773859e-05, + "loss": 0.25146183371543884, + "step": 2041 + }, + { + "epoch": 1.0088965772890153, + "grad_norm": 1.217205041102825, + "learning_rate": 1.0689866657641899e-05, + "loss": 0.29958251118659973, + "step": 2042 + }, + { + "epoch": 1.0093908315828495, + "grad_norm": 1.2422486891064726, + "learning_rate": 1.0681715301964817e-05, + "loss": 0.28512266278266907, + "step": 2043 + }, + { + "epoch": 1.0098850858766835, + "grad_norm": 1.3312817373132209, + "learning_rate": 1.067356349118434e-05, + "loss": 0.29768145084381104, + "step": 2044 + }, + { + "epoch": 1.0103793401705177, + "grad_norm": 1.2397312600868813, + "learning_rate": 1.0665411230742498e-05, + "loss": 0.25144103169441223, + "step": 2045 + }, + { + "epoch": 1.0108735944643519, + "grad_norm": 1.6026936131359757, + "learning_rate": 1.0657258526081629e-05, + "loss": 0.2673259973526001, + "step": 2046 + }, + { + "epoch": 1.011367848758186, + "grad_norm": 1.2940971813114743, + "learning_rate": 1.0649105382644359e-05, + "loss": 0.2845848500728607, + "step": 2047 + }, + { + "epoch": 1.0118621030520203, + "grad_norm": 1.0898574113835153, + "learning_rate": 1.0640951805873607e-05, + "loss": 0.2569392919540405, + "step": 2048 + }, + { + "epoch": 1.0123563573458545, + "grad_norm": 1.2632947550014098, + "learning_rate": 1.0632797801212591e-05, + "loss": 0.250387966632843, + "step": 2049 + }, + { + "epoch": 1.0128506116396887, + "grad_norm": 1.233630096360243, + "learning_rate": 1.0624643374104804e-05, + "loss": 0.28228282928466797, + "step": 2050 + }, + { + "epoch": 1.0133448659335227, + "grad_norm": 1.0888042979148498, + "learning_rate": 1.0616488529994024e-05, + "loss": 0.24724754691123962, + "step": 2051 + }, + { + "epoch": 1.013839120227357, + "grad_norm": 1.2576287774069197, + "learning_rate": 1.0608333274324312e-05, + "loss": 0.268532395362854, + "step": 2052 + }, + { + "epoch": 1.014333374521191, + "grad_norm": 1.1578525571147846, + "learning_rate": 1.0600177612539995e-05, + "loss": 0.27454662322998047, + "step": 2053 + }, + { + "epoch": 1.0148276288150253, + "grad_norm": 1.2050116136682636, + "learning_rate": 1.0592021550085683e-05, + "loss": 0.27497538924217224, + "step": 2054 + }, + { + "epoch": 1.0153218831088595, + "grad_norm": 1.1358282649300115, + "learning_rate": 1.0583865092406237e-05, + "loss": 0.24480152130126953, + "step": 2055 + }, + { + "epoch": 1.0158161374026937, + "grad_norm": 1.1352545460867702, + "learning_rate": 1.0575708244946805e-05, + "loss": 0.23754069209098816, + "step": 2056 + }, + { + "epoch": 1.016310391696528, + "grad_norm": 1.150720407382798, + "learning_rate": 1.056755101315277e-05, + "loss": 0.24541275203227997, + "step": 2057 + }, + { + "epoch": 1.0168046459903621, + "grad_norm": 1.2022551315194179, + "learning_rate": 1.055939340246979e-05, + "loss": 0.27724504470825195, + "step": 2058 + }, + { + "epoch": 1.0172989002841961, + "grad_norm": 1.2400168112160508, + "learning_rate": 1.0551235418343766e-05, + "loss": 0.2869918942451477, + "step": 2059 + }, + { + "epoch": 1.0177931545780303, + "grad_norm": 1.2299839323583324, + "learning_rate": 1.0543077066220854e-05, + "loss": 0.27153679728507996, + "step": 2060 + }, + { + "epoch": 1.0182874088718645, + "grad_norm": 1.1366017541860491, + "learning_rate": 1.0534918351547454e-05, + "loss": 0.2611347436904907, + "step": 2061 + }, + { + "epoch": 1.0187816631656987, + "grad_norm": 1.1317421431613228, + "learning_rate": 1.0526759279770202e-05, + "loss": 0.26649200916290283, + "step": 2062 + }, + { + "epoch": 1.019275917459533, + "grad_norm": 1.0930466767865903, + "learning_rate": 1.0518599856335983e-05, + "loss": 0.25164204835891724, + "step": 2063 + }, + { + "epoch": 1.0197701717533671, + "grad_norm": 1.2027289451385044, + "learning_rate": 1.0510440086691911e-05, + "loss": 0.288251131772995, + "step": 2064 + }, + { + "epoch": 1.0202644260472014, + "grad_norm": 1.2837951062377317, + "learning_rate": 1.0502279976285325e-05, + "loss": 0.27177444100379944, + "step": 2065 + }, + { + "epoch": 1.0207586803410356, + "grad_norm": 1.222948820556725, + "learning_rate": 1.0494119530563812e-05, + "loss": 0.2723502218723297, + "step": 2066 + }, + { + "epoch": 1.0212529346348695, + "grad_norm": 1.214398839170698, + "learning_rate": 1.0485958754975156e-05, + "loss": 0.2704971432685852, + "step": 2067 + }, + { + "epoch": 1.0217471889287038, + "grad_norm": 1.267114179641731, + "learning_rate": 1.0477797654967376e-05, + "loss": 0.30302050709724426, + "step": 2068 + }, + { + "epoch": 1.022241443222538, + "grad_norm": 1.268227752862744, + "learning_rate": 1.0469636235988711e-05, + "loss": 0.26408523321151733, + "step": 2069 + }, + { + "epoch": 1.0227356975163722, + "grad_norm": 1.2197627847133865, + "learning_rate": 1.0461474503487606e-05, + "loss": 0.2691786289215088, + "step": 2070 + }, + { + "epoch": 1.0232299518102064, + "grad_norm": 1.2792531550605064, + "learning_rate": 1.0453312462912714e-05, + "loss": 0.2823137640953064, + "step": 2071 + }, + { + "epoch": 1.0237242061040406, + "grad_norm": 1.2027503273852609, + "learning_rate": 1.04451501197129e-05, + "loss": 0.28837013244628906, + "step": 2072 + }, + { + "epoch": 1.0242184603978748, + "grad_norm": 1.27109994402604, + "learning_rate": 1.0436987479337229e-05, + "loss": 0.2809562683105469, + "step": 2073 + }, + { + "epoch": 1.024712714691709, + "grad_norm": 1.240431430170138, + "learning_rate": 1.0428824547234956e-05, + "loss": 0.2604525685310364, + "step": 2074 + }, + { + "epoch": 1.025206968985543, + "grad_norm": 1.1799966275921325, + "learning_rate": 1.0420661328855546e-05, + "loss": 0.24755606055259705, + "step": 2075 + }, + { + "epoch": 1.0257012232793772, + "grad_norm": 1.148092531592558, + "learning_rate": 1.0412497829648642e-05, + "loss": 0.2592730224132538, + "step": 2076 + }, + { + "epoch": 1.0261954775732114, + "grad_norm": 1.2356689091758393, + "learning_rate": 1.0404334055064083e-05, + "loss": 0.2693594694137573, + "step": 2077 + }, + { + "epoch": 1.0266897318670456, + "grad_norm": 1.2195187999450414, + "learning_rate": 1.0396170010551881e-05, + "loss": 0.2712753117084503, + "step": 2078 + }, + { + "epoch": 1.0271839861608798, + "grad_norm": 1.1741285828383992, + "learning_rate": 1.0388005701562245e-05, + "loss": 0.2693077027797699, + "step": 2079 + }, + { + "epoch": 1.027678240454714, + "grad_norm": 1.2670826968894364, + "learning_rate": 1.0379841133545544e-05, + "loss": 0.2791144847869873, + "step": 2080 + }, + { + "epoch": 1.0281724947485482, + "grad_norm": 1.163594554813514, + "learning_rate": 1.037167631195233e-05, + "loss": 0.27496254444122314, + "step": 2081 + }, + { + "epoch": 1.0286667490423822, + "grad_norm": 1.1305894692188725, + "learning_rate": 1.0363511242233322e-05, + "loss": 0.26037347316741943, + "step": 2082 + }, + { + "epoch": 1.0291610033362164, + "grad_norm": 1.2085934995349474, + "learning_rate": 1.0355345929839402e-05, + "loss": 0.2610514760017395, + "step": 2083 + }, + { + "epoch": 1.0296552576300506, + "grad_norm": 1.1531883738354434, + "learning_rate": 1.0347180380221618e-05, + "loss": 0.24750857055187225, + "step": 2084 + }, + { + "epoch": 1.0301495119238848, + "grad_norm": 1.2017075670935908, + "learning_rate": 1.0339014598831169e-05, + "loss": 0.2835415303707123, + "step": 2085 + }, + { + "epoch": 1.030643766217719, + "grad_norm": 1.2153811049556569, + "learning_rate": 1.033084859111942e-05, + "loss": 0.25762057304382324, + "step": 2086 + }, + { + "epoch": 1.0311380205115532, + "grad_norm": 1.3245241554987517, + "learning_rate": 1.032268236253788e-05, + "loss": 0.2818237841129303, + "step": 2087 + }, + { + "epoch": 1.0316322748053874, + "grad_norm": 1.2402911628462394, + "learning_rate": 1.0314515918538202e-05, + "loss": 0.27192944288253784, + "step": 2088 + }, + { + "epoch": 1.0321265290992216, + "grad_norm": 1.1715597954552734, + "learning_rate": 1.0306349264572195e-05, + "loss": 0.3002319931983948, + "step": 2089 + }, + { + "epoch": 1.0326207833930556, + "grad_norm": 1.221598051409306, + "learning_rate": 1.0298182406091794e-05, + "loss": 0.27106401324272156, + "step": 2090 + }, + { + "epoch": 1.0331150376868898, + "grad_norm": 1.2123644146814079, + "learning_rate": 1.0290015348549076e-05, + "loss": 0.2740558385848999, + "step": 2091 + }, + { + "epoch": 1.033609291980724, + "grad_norm": 1.2394453454529126, + "learning_rate": 1.0281848097396261e-05, + "loss": 0.2970008850097656, + "step": 2092 + }, + { + "epoch": 1.0341035462745582, + "grad_norm": 1.2003549808286662, + "learning_rate": 1.027368065808568e-05, + "loss": 0.27684125304222107, + "step": 2093 + }, + { + "epoch": 1.0345978005683925, + "grad_norm": 1.1371538472805924, + "learning_rate": 1.0265513036069803e-05, + "loss": 0.2732700705528259, + "step": 2094 + }, + { + "epoch": 1.0350920548622267, + "grad_norm": 1.1448190493490698, + "learning_rate": 1.0257345236801215e-05, + "loss": 0.25189805030822754, + "step": 2095 + }, + { + "epoch": 1.0355863091560609, + "grad_norm": 1.1221327830153236, + "learning_rate": 1.0249177265732629e-05, + "loss": 0.3177054524421692, + "step": 2096 + }, + { + "epoch": 1.036080563449895, + "grad_norm": 1.0492479192600686, + "learning_rate": 1.0241009128316854e-05, + "loss": 0.23350921273231506, + "step": 2097 + }, + { + "epoch": 1.036574817743729, + "grad_norm": 1.2565303796372052, + "learning_rate": 1.0232840830006832e-05, + "loss": 0.3011140525341034, + "step": 2098 + }, + { + "epoch": 1.0370690720375633, + "grad_norm": 1.164329016307231, + "learning_rate": 1.0224672376255598e-05, + "loss": 0.2578561305999756, + "step": 2099 + }, + { + "epoch": 1.0375633263313975, + "grad_norm": 1.1701632763887444, + "learning_rate": 1.0216503772516297e-05, + "loss": 0.2622804045677185, + "step": 2100 + }, + { + "epoch": 1.0380575806252317, + "grad_norm": 1.219987069304434, + "learning_rate": 1.0208335024242169e-05, + "loss": 0.2662869691848755, + "step": 2101 + }, + { + "epoch": 1.0385518349190659, + "grad_norm": 1.2303351498865798, + "learning_rate": 1.0200166136886558e-05, + "loss": 0.27084922790527344, + "step": 2102 + }, + { + "epoch": 1.0390460892129, + "grad_norm": 1.2434849653646893, + "learning_rate": 1.0191997115902891e-05, + "loss": 0.26290780305862427, + "step": 2103 + }, + { + "epoch": 1.0395403435067343, + "grad_norm": 1.192171896111284, + "learning_rate": 1.0183827966744694e-05, + "loss": 0.27367106080055237, + "step": 2104 + }, + { + "epoch": 1.0400345978005685, + "grad_norm": 1.2706879657010888, + "learning_rate": 1.0175658694865574e-05, + "loss": 0.28507113456726074, + "step": 2105 + }, + { + "epoch": 1.0405288520944025, + "grad_norm": 1.2299041683114893, + "learning_rate": 1.0167489305719221e-05, + "loss": 0.2533179521560669, + "step": 2106 + }, + { + "epoch": 1.0410231063882367, + "grad_norm": 1.2546449586851505, + "learning_rate": 1.0159319804759398e-05, + "loss": 0.28755924105644226, + "step": 2107 + }, + { + "epoch": 1.041517360682071, + "grad_norm": 1.1726176332749902, + "learning_rate": 1.015115019743995e-05, + "loss": 0.26722773909568787, + "step": 2108 + }, + { + "epoch": 1.042011614975905, + "grad_norm": 1.3986075029095133, + "learning_rate": 1.0142980489214788e-05, + "loss": 0.3122308850288391, + "step": 2109 + }, + { + "epoch": 1.0425058692697393, + "grad_norm": 1.1273960807987882, + "learning_rate": 1.0134810685537899e-05, + "loss": 0.22603261470794678, + "step": 2110 + }, + { + "epoch": 1.0430001235635735, + "grad_norm": 1.1517998097919544, + "learning_rate": 1.0126640791863316e-05, + "loss": 0.2823299169540405, + "step": 2111 + }, + { + "epoch": 1.0434943778574077, + "grad_norm": 1.3191906526904469, + "learning_rate": 1.0118470813645156e-05, + "loss": 0.30999040603637695, + "step": 2112 + }, + { + "epoch": 1.0439886321512417, + "grad_norm": 1.1820148857556874, + "learning_rate": 1.0110300756337569e-05, + "loss": 0.266022264957428, + "step": 2113 + }, + { + "epoch": 1.044482886445076, + "grad_norm": 1.6608098375974347, + "learning_rate": 1.0102130625394776e-05, + "loss": 0.2674095034599304, + "step": 2114 + }, + { + "epoch": 1.0449771407389101, + "grad_norm": 1.2172826939531747, + "learning_rate": 1.0093960426271037e-05, + "loss": 0.30045652389526367, + "step": 2115 + }, + { + "epoch": 1.0454713950327443, + "grad_norm": 1.1782919874699391, + "learning_rate": 1.0085790164420659e-05, + "loss": 0.28455668687820435, + "step": 2116 + }, + { + "epoch": 1.0459656493265785, + "grad_norm": 1.1749948852757104, + "learning_rate": 1.0077619845297992e-05, + "loss": 0.2429066300392151, + "step": 2117 + }, + { + "epoch": 1.0464599036204127, + "grad_norm": 1.1453766958637177, + "learning_rate": 1.0069449474357427e-05, + "loss": 0.2515121102333069, + "step": 2118 + }, + { + "epoch": 1.046954157914247, + "grad_norm": 1.234414346344525, + "learning_rate": 1.0061279057053385e-05, + "loss": 0.30011802911758423, + "step": 2119 + }, + { + "epoch": 1.0474484122080812, + "grad_norm": 1.1997300836338318, + "learning_rate": 1.005310859884032e-05, + "loss": 0.2577645480632782, + "step": 2120 + }, + { + "epoch": 1.0479426665019151, + "grad_norm": 1.0391250618888572, + "learning_rate": 1.0044938105172713e-05, + "loss": 0.21476465463638306, + "step": 2121 + }, + { + "epoch": 1.0484369207957493, + "grad_norm": 1.3902782329860977, + "learning_rate": 1.0036767581505067e-05, + "loss": 0.2587023079395294, + "step": 2122 + }, + { + "epoch": 1.0489311750895836, + "grad_norm": 1.1311469001510768, + "learning_rate": 1.0028597033291911e-05, + "loss": 0.2537185251712799, + "step": 2123 + }, + { + "epoch": 1.0494254293834178, + "grad_norm": 1.0410406857423857, + "learning_rate": 1.0020426465987782e-05, + "loss": 0.24486014246940613, + "step": 2124 + }, + { + "epoch": 1.049919683677252, + "grad_norm": 1.4376390907817962, + "learning_rate": 1.0012255885047241e-05, + "loss": 0.2728436589241028, + "step": 2125 + }, + { + "epoch": 1.0504139379710862, + "grad_norm": 1.3186765660198476, + "learning_rate": 1.0004085295924843e-05, + "loss": 0.30238842964172363, + "step": 2126 + }, + { + "epoch": 1.0509081922649204, + "grad_norm": 1.2910923396564535, + "learning_rate": 9.99591470407516e-06, + "loss": 0.30347609519958496, + "step": 2127 + }, + { + "epoch": 1.0514024465587544, + "grad_norm": 1.2188667375190219, + "learning_rate": 9.987744114952764e-06, + "loss": 0.2581411302089691, + "step": 2128 + }, + { + "epoch": 1.0518967008525886, + "grad_norm": 1.2560629408792487, + "learning_rate": 9.979573534012218e-06, + "loss": 0.239881694316864, + "step": 2129 + }, + { + "epoch": 1.0523909551464228, + "grad_norm": 1.2977893982324902, + "learning_rate": 9.971402966708092e-06, + "loss": 0.3058615028858185, + "step": 2130 + }, + { + "epoch": 1.052885209440257, + "grad_norm": 1.2842102843103194, + "learning_rate": 9.963232418494936e-06, + "loss": 0.25285837054252625, + "step": 2131 + }, + { + "epoch": 1.0533794637340912, + "grad_norm": 1.2217652802535364, + "learning_rate": 9.955061894827294e-06, + "loss": 0.27366510033607483, + "step": 2132 + }, + { + "epoch": 1.0538737180279254, + "grad_norm": 1.1489983530266883, + "learning_rate": 9.946891401159683e-06, + "loss": 0.22268086671829224, + "step": 2133 + }, + { + "epoch": 1.0543679723217596, + "grad_norm": 1.1461059074650484, + "learning_rate": 9.938720942946616e-06, + "loss": 0.2540682554244995, + "step": 2134 + }, + { + "epoch": 1.0548622266155938, + "grad_norm": 1.2357731632052622, + "learning_rate": 9.930550525642576e-06, + "loss": 0.262179970741272, + "step": 2135 + }, + { + "epoch": 1.0553564809094278, + "grad_norm": 1.2267299487839205, + "learning_rate": 9.92238015470201e-06, + "loss": 0.25471946597099304, + "step": 2136 + }, + { + "epoch": 1.055850735203262, + "grad_norm": 1.162352058446371, + "learning_rate": 9.914209835579344e-06, + "loss": 0.2580556571483612, + "step": 2137 + }, + { + "epoch": 1.0563449894970962, + "grad_norm": 1.261401071852413, + "learning_rate": 9.906039573728964e-06, + "loss": 0.29909616708755493, + "step": 2138 + }, + { + "epoch": 1.0568392437909304, + "grad_norm": 1.2162562018595562, + "learning_rate": 9.897869374605226e-06, + "loss": 0.2828724980354309, + "step": 2139 + }, + { + "epoch": 1.0573334980847646, + "grad_norm": 1.2076714268656592, + "learning_rate": 9.889699243662433e-06, + "loss": 0.26731711626052856, + "step": 2140 + }, + { + "epoch": 1.0578277523785988, + "grad_norm": 1.2666827338430986, + "learning_rate": 9.88152918635485e-06, + "loss": 0.2912555932998657, + "step": 2141 + }, + { + "epoch": 1.058322006672433, + "grad_norm": 1.1593053736993435, + "learning_rate": 9.873359208136685e-06, + "loss": 0.2335313856601715, + "step": 2142 + }, + { + "epoch": 1.0588162609662672, + "grad_norm": 1.2934128795704303, + "learning_rate": 9.865189314462105e-06, + "loss": 0.2716987729072571, + "step": 2143 + }, + { + "epoch": 1.0593105152601012, + "grad_norm": 1.3251488161911162, + "learning_rate": 9.857019510785215e-06, + "loss": 0.2919968068599701, + "step": 2144 + }, + { + "epoch": 1.0598047695539354, + "grad_norm": 1.197230535187453, + "learning_rate": 9.848849802560057e-06, + "loss": 0.26279503107070923, + "step": 2145 + }, + { + "epoch": 1.0602990238477696, + "grad_norm": 1.263871154668556, + "learning_rate": 9.840680195240606e-06, + "loss": 0.31622597575187683, + "step": 2146 + }, + { + "epoch": 1.0607932781416038, + "grad_norm": 1.270948260835911, + "learning_rate": 9.832510694280782e-06, + "loss": 0.2399556040763855, + "step": 2147 + }, + { + "epoch": 1.061287532435438, + "grad_norm": 1.2181574543701559, + "learning_rate": 9.824341305134428e-06, + "loss": 0.2650333046913147, + "step": 2148 + }, + { + "epoch": 1.0617817867292723, + "grad_norm": 1.274348887888969, + "learning_rate": 9.816172033255307e-06, + "loss": 0.26629161834716797, + "step": 2149 + }, + { + "epoch": 1.0622760410231065, + "grad_norm": 1.2611051957138737, + "learning_rate": 9.808002884097109e-06, + "loss": 0.28042545914649963, + "step": 2150 + }, + { + "epoch": 1.0627702953169407, + "grad_norm": 1.1495131020915084, + "learning_rate": 9.799833863113445e-06, + "loss": 0.24374082684516907, + "step": 2151 + }, + { + "epoch": 1.0632645496107747, + "grad_norm": 1.1048551979398207, + "learning_rate": 9.791664975757835e-06, + "loss": 0.23013898730278015, + "step": 2152 + }, + { + "epoch": 1.0637588039046089, + "grad_norm": 1.4072884886903234, + "learning_rate": 9.783496227483706e-06, + "loss": 0.25313276052474976, + "step": 2153 + }, + { + "epoch": 1.064253058198443, + "grad_norm": 1.248155174046862, + "learning_rate": 9.775327623744403e-06, + "loss": 0.2642362713813782, + "step": 2154 + }, + { + "epoch": 1.0647473124922773, + "grad_norm": 1.1405325090848468, + "learning_rate": 9.76715916999317e-06, + "loss": 0.2417108118534088, + "step": 2155 + }, + { + "epoch": 1.0652415667861115, + "grad_norm": 1.2556215450887547, + "learning_rate": 9.758990871683148e-06, + "loss": 0.25653502345085144, + "step": 2156 + }, + { + "epoch": 1.0657358210799457, + "grad_norm": 1.22877547041534, + "learning_rate": 9.750822734267378e-06, + "loss": 0.247604638338089, + "step": 2157 + }, + { + "epoch": 1.06623007537378, + "grad_norm": 1.2330600407976389, + "learning_rate": 9.742654763198786e-06, + "loss": 0.2675636112689972, + "step": 2158 + }, + { + "epoch": 1.0667243296676139, + "grad_norm": 1.230290211943024, + "learning_rate": 9.7344869639302e-06, + "loss": 0.2570686340332031, + "step": 2159 + }, + { + "epoch": 1.067218583961448, + "grad_norm": 1.4290278531414855, + "learning_rate": 9.726319341914323e-06, + "loss": 0.3046165704727173, + "step": 2160 + }, + { + "epoch": 1.0677128382552823, + "grad_norm": 1.3759048148010737, + "learning_rate": 9.718151902603744e-06, + "loss": 0.24278515577316284, + "step": 2161 + }, + { + "epoch": 1.0682070925491165, + "grad_norm": 1.235098490769484, + "learning_rate": 9.709984651450924e-06, + "loss": 0.2565615773200989, + "step": 2162 + }, + { + "epoch": 1.0687013468429507, + "grad_norm": 1.3303607886608886, + "learning_rate": 9.701817593908209e-06, + "loss": 0.2672972083091736, + "step": 2163 + }, + { + "epoch": 1.069195601136785, + "grad_norm": 1.1620974642583077, + "learning_rate": 9.693650735427808e-06, + "loss": 0.21376445889472961, + "step": 2164 + }, + { + "epoch": 1.0696898554306191, + "grad_norm": 1.2628274098639385, + "learning_rate": 9.685484081461802e-06, + "loss": 0.27743393182754517, + "step": 2165 + }, + { + "epoch": 1.0701841097244533, + "grad_norm": 1.3615817033316626, + "learning_rate": 9.677317637462125e-06, + "loss": 0.2747134566307068, + "step": 2166 + }, + { + "epoch": 1.0706783640182873, + "grad_norm": 1.1533673233774355, + "learning_rate": 9.669151408880581e-06, + "loss": 0.2775312066078186, + "step": 2167 + }, + { + "epoch": 1.0711726183121215, + "grad_norm": 1.392383813550365, + "learning_rate": 9.660985401168833e-06, + "loss": 0.2743167281150818, + "step": 2168 + }, + { + "epoch": 1.0716668726059557, + "grad_norm": 1.1731022030570613, + "learning_rate": 9.652819619778387e-06, + "loss": 0.26030686497688293, + "step": 2169 + }, + { + "epoch": 1.07216112689979, + "grad_norm": 1.2886350622041207, + "learning_rate": 9.644654070160603e-06, + "loss": 0.32307812571525574, + "step": 2170 + }, + { + "epoch": 1.0726553811936241, + "grad_norm": 1.309807945595821, + "learning_rate": 9.63648875776668e-06, + "loss": 0.2773011028766632, + "step": 2171 + }, + { + "epoch": 1.0731496354874583, + "grad_norm": 1.3767412291020849, + "learning_rate": 9.628323688047672e-06, + "loss": 0.27996528148651123, + "step": 2172 + }, + { + "epoch": 1.0736438897812925, + "grad_norm": 1.176261909375135, + "learning_rate": 9.620158866454459e-06, + "loss": 0.28022176027297974, + "step": 2173 + }, + { + "epoch": 1.0741381440751268, + "grad_norm": 1.1746327357052728, + "learning_rate": 9.61199429843776e-06, + "loss": 0.2688876986503601, + "step": 2174 + }, + { + "epoch": 1.0746323983689607, + "grad_norm": 1.1454924799354713, + "learning_rate": 9.60382998944812e-06, + "loss": 0.23915211856365204, + "step": 2175 + }, + { + "epoch": 1.075126652662795, + "grad_norm": 1.1770664027196904, + "learning_rate": 9.59566594493592e-06, + "loss": 0.2533806264400482, + "step": 2176 + }, + { + "epoch": 1.0756209069566292, + "grad_norm": 1.2321355277799408, + "learning_rate": 9.587502170351361e-06, + "loss": 0.2887522876262665, + "step": 2177 + }, + { + "epoch": 1.0761151612504634, + "grad_norm": 1.2169372388289537, + "learning_rate": 9.579338671144459e-06, + "loss": 0.2885408401489258, + "step": 2178 + }, + { + "epoch": 1.0766094155442976, + "grad_norm": 1.2209492195717289, + "learning_rate": 9.571175452765045e-06, + "loss": 0.25656914710998535, + "step": 2179 + }, + { + "epoch": 1.0771036698381318, + "grad_norm": 1.2669016448608037, + "learning_rate": 9.563012520662773e-06, + "loss": 0.2935143709182739, + "step": 2180 + }, + { + "epoch": 1.077597924131966, + "grad_norm": 1.2902152081672096, + "learning_rate": 9.554849880287103e-06, + "loss": 0.26728200912475586, + "step": 2181 + }, + { + "epoch": 1.0780921784258002, + "grad_norm": 1.4327778934971358, + "learning_rate": 9.546687537087287e-06, + "loss": 0.2558351159095764, + "step": 2182 + }, + { + "epoch": 1.0785864327196342, + "grad_norm": 1.133861673349663, + "learning_rate": 9.538525496512394e-06, + "loss": 0.2517240047454834, + "step": 2183 + }, + { + "epoch": 1.0790806870134684, + "grad_norm": 1.1033603168250732, + "learning_rate": 9.53036376401129e-06, + "loss": 0.23258647322654724, + "step": 2184 + }, + { + "epoch": 1.0795749413073026, + "grad_norm": 1.2016172891455823, + "learning_rate": 9.522202345032627e-06, + "loss": 0.24100016057491302, + "step": 2185 + }, + { + "epoch": 1.0800691956011368, + "grad_norm": 1.1844138198826075, + "learning_rate": 9.51404124502485e-06, + "loss": 0.27807697653770447, + "step": 2186 + }, + { + "epoch": 1.080563449894971, + "grad_norm": 1.2045646158236256, + "learning_rate": 9.50588046943619e-06, + "loss": 0.26146867871284485, + "step": 2187 + }, + { + "epoch": 1.0810577041888052, + "grad_norm": 1.3792610621050578, + "learning_rate": 9.497720023714675e-06, + "loss": 0.28570955991744995, + "step": 2188 + }, + { + "epoch": 1.0815519584826394, + "grad_norm": 1.146591161630138, + "learning_rate": 9.489559913308092e-06, + "loss": 0.22583246231079102, + "step": 2189 + }, + { + "epoch": 1.0820462127764734, + "grad_norm": 1.2292468406383597, + "learning_rate": 9.48140014366402e-06, + "loss": 0.27526232600212097, + "step": 2190 + }, + { + "epoch": 1.0825404670703076, + "grad_norm": 1.287410242270342, + "learning_rate": 9.473240720229803e-06, + "loss": 0.2777514159679413, + "step": 2191 + }, + { + "epoch": 1.0830347213641418, + "grad_norm": 1.217692620890676, + "learning_rate": 9.465081648452549e-06, + "loss": 0.25767001509666443, + "step": 2192 + }, + { + "epoch": 1.083528975657976, + "grad_norm": 1.2401214064051047, + "learning_rate": 9.456922933779148e-06, + "loss": 0.24114865064620972, + "step": 2193 + }, + { + "epoch": 1.0840232299518102, + "grad_norm": 1.3343620945353547, + "learning_rate": 9.448764581656237e-06, + "loss": 0.31198200583457947, + "step": 2194 + }, + { + "epoch": 1.0845174842456444, + "grad_norm": 1.2865355942160217, + "learning_rate": 9.440606597530213e-06, + "loss": 0.2724478840827942, + "step": 2195 + }, + { + "epoch": 1.0850117385394786, + "grad_norm": 1.2982367761916904, + "learning_rate": 9.432448986847229e-06, + "loss": 0.27796900272369385, + "step": 2196 + }, + { + "epoch": 1.0855059928333128, + "grad_norm": 1.293883522594156, + "learning_rate": 9.424291755053198e-06, + "loss": 0.2877587676048279, + "step": 2197 + }, + { + "epoch": 1.0860002471271468, + "grad_norm": 1.354561961211439, + "learning_rate": 9.416134907593764e-06, + "loss": 0.2898337244987488, + "step": 2198 + }, + { + "epoch": 1.086494501420981, + "grad_norm": 1.2931825621227928, + "learning_rate": 9.407978449914322e-06, + "loss": 0.2544672191143036, + "step": 2199 + }, + { + "epoch": 1.0869887557148152, + "grad_norm": 1.2905943399481439, + "learning_rate": 9.399822387460005e-06, + "loss": 0.28336071968078613, + "step": 2200 + }, + { + "epoch": 1.0874830100086494, + "grad_norm": 1.2871287196611743, + "learning_rate": 9.391666725675691e-06, + "loss": 0.2862734794616699, + "step": 2201 + }, + { + "epoch": 1.0879772643024836, + "grad_norm": 1.386969000020192, + "learning_rate": 9.383511470005978e-06, + "loss": 0.26331260800361633, + "step": 2202 + }, + { + "epoch": 1.0884715185963179, + "grad_norm": 1.2750467510922643, + "learning_rate": 9.375356625895201e-06, + "loss": 0.30087417364120483, + "step": 2203 + }, + { + "epoch": 1.088965772890152, + "grad_norm": 1.3434362766675538, + "learning_rate": 9.36720219878741e-06, + "loss": 0.2736594080924988, + "step": 2204 + }, + { + "epoch": 1.089460027183986, + "grad_norm": 1.4852243291487657, + "learning_rate": 9.359048194126395e-06, + "loss": 0.2704418897628784, + "step": 2205 + }, + { + "epoch": 1.0899542814778203, + "grad_norm": 1.2230094225693318, + "learning_rate": 9.350894617355645e-06, + "loss": 0.24540236592292786, + "step": 2206 + }, + { + "epoch": 1.0904485357716545, + "grad_norm": 1.2299505503288506, + "learning_rate": 9.342741473918375e-06, + "loss": 0.26376527547836304, + "step": 2207 + }, + { + "epoch": 1.0909427900654887, + "grad_norm": 1.0803859595224048, + "learning_rate": 9.334588769257502e-06, + "loss": 0.24062004685401917, + "step": 2208 + }, + { + "epoch": 1.0914370443593229, + "grad_norm": 1.1443970874822365, + "learning_rate": 9.326436508815662e-06, + "loss": 0.24209418892860413, + "step": 2209 + }, + { + "epoch": 1.091931298653157, + "grad_norm": 1.3414968412819865, + "learning_rate": 9.318284698035188e-06, + "loss": 0.2732285261154175, + "step": 2210 + }, + { + "epoch": 1.0924255529469913, + "grad_norm": 1.2470429271312866, + "learning_rate": 9.310133342358106e-06, + "loss": 0.2684158980846405, + "step": 2211 + }, + { + "epoch": 1.0929198072408255, + "grad_norm": 1.1035267199988392, + "learning_rate": 9.301982447226145e-06, + "loss": 0.22511601448059082, + "step": 2212 + }, + { + "epoch": 1.0934140615346597, + "grad_norm": 1.165505029883992, + "learning_rate": 9.293832018080731e-06, + "loss": 0.2622867226600647, + "step": 2213 + }, + { + "epoch": 1.0939083158284937, + "grad_norm": 1.2923685951682604, + "learning_rate": 9.285682060362974e-06, + "loss": 0.3030891418457031, + "step": 2214 + }, + { + "epoch": 1.094402570122328, + "grad_norm": 1.2523210407583818, + "learning_rate": 9.277532579513666e-06, + "loss": 0.24928592145442963, + "step": 2215 + }, + { + "epoch": 1.094896824416162, + "grad_norm": 1.2048717570746186, + "learning_rate": 9.269383580973285e-06, + "loss": 0.2588339149951935, + "step": 2216 + }, + { + "epoch": 1.0953910787099963, + "grad_norm": 1.2427748942142012, + "learning_rate": 9.261235070181983e-06, + "loss": 0.2587873339653015, + "step": 2217 + }, + { + "epoch": 1.0958853330038305, + "grad_norm": 1.3192410250632676, + "learning_rate": 9.253087052579596e-06, + "loss": 0.29420971870422363, + "step": 2218 + }, + { + "epoch": 1.0963795872976647, + "grad_norm": 1.1714489078180652, + "learning_rate": 9.244939533605619e-06, + "loss": 0.25384342670440674, + "step": 2219 + }, + { + "epoch": 1.096873841591499, + "grad_norm": 1.2208998726962157, + "learning_rate": 9.236792518699224e-06, + "loss": 0.23133251070976257, + "step": 2220 + }, + { + "epoch": 1.097368095885333, + "grad_norm": 1.1919788928879418, + "learning_rate": 9.228646013299233e-06, + "loss": 0.26196008920669556, + "step": 2221 + }, + { + "epoch": 1.0978623501791671, + "grad_norm": 1.345065700534229, + "learning_rate": 9.220500022844144e-06, + "loss": 0.2567690908908844, + "step": 2222 + }, + { + "epoch": 1.0983566044730013, + "grad_norm": 1.1808254692787845, + "learning_rate": 9.212354552772107e-06, + "loss": 0.2555367350578308, + "step": 2223 + }, + { + "epoch": 1.0988508587668355, + "grad_norm": 1.1544608952675586, + "learning_rate": 9.204209608520913e-06, + "loss": 0.24357245862483978, + "step": 2224 + }, + { + "epoch": 1.0993451130606697, + "grad_norm": 1.3367524689374175, + "learning_rate": 9.19606519552801e-06, + "loss": 0.2792712450027466, + "step": 2225 + }, + { + "epoch": 1.099839367354504, + "grad_norm": 1.3277136329189279, + "learning_rate": 9.1879213192305e-06, + "loss": 0.29090794920921326, + "step": 2226 + }, + { + "epoch": 1.1003336216483381, + "grad_norm": 1.304360721279056, + "learning_rate": 9.179777985065115e-06, + "loss": 0.2777528762817383, + "step": 2227 + }, + { + "epoch": 1.1008278759421724, + "grad_norm": 1.1781995191131436, + "learning_rate": 9.171635198468227e-06, + "loss": 0.263868123292923, + "step": 2228 + }, + { + "epoch": 1.1013221302360063, + "grad_norm": 1.184942105326879, + "learning_rate": 9.16349296487584e-06, + "loss": 0.24118748307228088, + "step": 2229 + }, + { + "epoch": 1.1018163845298405, + "grad_norm": 1.2411255946822906, + "learning_rate": 9.155351289723603e-06, + "loss": 0.2176896631717682, + "step": 2230 + }, + { + "epoch": 1.1023106388236747, + "grad_norm": 1.3759218504425914, + "learning_rate": 9.147210178446776e-06, + "loss": 0.24727840721607208, + "step": 2231 + }, + { + "epoch": 1.102804893117509, + "grad_norm": 1.287783002848043, + "learning_rate": 9.139069636480247e-06, + "loss": 0.2711295783519745, + "step": 2232 + }, + { + "epoch": 1.1032991474113432, + "grad_norm": 1.2808604096079383, + "learning_rate": 9.130929669258525e-06, + "loss": 0.2987736165523529, + "step": 2233 + }, + { + "epoch": 1.1037934017051774, + "grad_norm": 1.3771259989337001, + "learning_rate": 9.122790282215743e-06, + "loss": 0.2773835062980652, + "step": 2234 + }, + { + "epoch": 1.1042876559990116, + "grad_norm": 1.2299830744412572, + "learning_rate": 9.114651480785632e-06, + "loss": 0.29417523741722107, + "step": 2235 + }, + { + "epoch": 1.1047819102928456, + "grad_norm": 1.377692958442212, + "learning_rate": 9.106513270401545e-06, + "loss": 0.2642611265182495, + "step": 2236 + }, + { + "epoch": 1.1052761645866798, + "grad_norm": 1.2764125735134089, + "learning_rate": 9.098375656496434e-06, + "loss": 0.2789427638053894, + "step": 2237 + }, + { + "epoch": 1.105770418880514, + "grad_norm": 1.3238778744589295, + "learning_rate": 9.090238644502845e-06, + "loss": 0.3002237379550934, + "step": 2238 + }, + { + "epoch": 1.1062646731743482, + "grad_norm": 1.1862434874371655, + "learning_rate": 9.082102239852942e-06, + "loss": 0.27620676159858704, + "step": 2239 + }, + { + "epoch": 1.1067589274681824, + "grad_norm": 1.327009037228036, + "learning_rate": 9.07396644797847e-06, + "loss": 0.26718735694885254, + "step": 2240 + }, + { + "epoch": 1.1072531817620166, + "grad_norm": 1.3581828145326202, + "learning_rate": 9.065831274310763e-06, + "loss": 0.27443817257881165, + "step": 2241 + }, + { + "epoch": 1.1077474360558508, + "grad_norm": 1.2348189100714968, + "learning_rate": 9.057696724280748e-06, + "loss": 0.2536284923553467, + "step": 2242 + }, + { + "epoch": 1.108241690349685, + "grad_norm": 1.274876240899672, + "learning_rate": 9.049562803318942e-06, + "loss": 0.2583077549934387, + "step": 2243 + }, + { + "epoch": 1.108735944643519, + "grad_norm": 1.2591915779147578, + "learning_rate": 9.041429516855427e-06, + "loss": 0.2696278393268585, + "step": 2244 + }, + { + "epoch": 1.1092301989373532, + "grad_norm": 1.4248240108913692, + "learning_rate": 9.033296870319868e-06, + "loss": 0.2966364622116089, + "step": 2245 + }, + { + "epoch": 1.1097244532311874, + "grad_norm": 1.1050822330716321, + "learning_rate": 9.025164869141503e-06, + "loss": 0.22690679132938385, + "step": 2246 + }, + { + "epoch": 1.1102187075250216, + "grad_norm": 1.192560579016723, + "learning_rate": 9.017033518749147e-06, + "loss": 0.2777915894985199, + "step": 2247 + }, + { + "epoch": 1.1107129618188558, + "grad_norm": 1.3394858504136318, + "learning_rate": 9.008902824571168e-06, + "loss": 0.2890303134918213, + "step": 2248 + }, + { + "epoch": 1.11120721611269, + "grad_norm": 1.0426463189164805, + "learning_rate": 9.000772792035505e-06, + "loss": 0.22669392824172974, + "step": 2249 + }, + { + "epoch": 1.1117014704065242, + "grad_norm": 1.1970809485558533, + "learning_rate": 8.992643426569643e-06, + "loss": 0.26416563987731934, + "step": 2250 + }, + { + "epoch": 1.1121957247003584, + "grad_norm": 1.1888202892832207, + "learning_rate": 8.984514733600641e-06, + "loss": 0.2745298147201538, + "step": 2251 + }, + { + "epoch": 1.1126899789941924, + "grad_norm": 1.3798693264357922, + "learning_rate": 8.97638671855509e-06, + "loss": 0.31175684928894043, + "step": 2252 + }, + { + "epoch": 1.1131842332880266, + "grad_norm": 1.1626887122886307, + "learning_rate": 8.968259386859146e-06, + "loss": 0.2632657289505005, + "step": 2253 + }, + { + "epoch": 1.1136784875818608, + "grad_norm": 1.810662888324155, + "learning_rate": 8.960132743938485e-06, + "loss": 0.25820252299308777, + "step": 2254 + }, + { + "epoch": 1.114172741875695, + "grad_norm": 1.061521514088085, + "learning_rate": 8.95200679521835e-06, + "loss": 0.24255456030368805, + "step": 2255 + }, + { + "epoch": 1.1146669961695292, + "grad_norm": 1.2696759740581753, + "learning_rate": 8.943881546123506e-06, + "loss": 0.2973442077636719, + "step": 2256 + }, + { + "epoch": 1.1151612504633635, + "grad_norm": 1.1336353694819978, + "learning_rate": 8.935757002078252e-06, + "loss": 0.23320606350898743, + "step": 2257 + }, + { + "epoch": 1.1156555047571977, + "grad_norm": 1.275444057796017, + "learning_rate": 8.927633168506415e-06, + "loss": 0.2923268675804138, + "step": 2258 + }, + { + "epoch": 1.1161497590510319, + "grad_norm": 1.25496425665649, + "learning_rate": 8.91951005083135e-06, + "loss": 0.25932425260543823, + "step": 2259 + }, + { + "epoch": 1.1166440133448658, + "grad_norm": 1.2215943645090854, + "learning_rate": 8.911387654475943e-06, + "loss": 0.2631821036338806, + "step": 2260 + }, + { + "epoch": 1.1171382676387, + "grad_norm": 1.226020936236602, + "learning_rate": 8.903265984862581e-06, + "loss": 0.24741420149803162, + "step": 2261 + }, + { + "epoch": 1.1176325219325343, + "grad_norm": 1.165036984102613, + "learning_rate": 8.895145047413178e-06, + "loss": 0.2593516707420349, + "step": 2262 + }, + { + "epoch": 1.1181267762263685, + "grad_norm": 1.2132388690590856, + "learning_rate": 8.88702484754915e-06, + "loss": 0.22109609842300415, + "step": 2263 + }, + { + "epoch": 1.1186210305202027, + "grad_norm": 1.242512673005374, + "learning_rate": 8.878905390691437e-06, + "loss": 0.24363039433956146, + "step": 2264 + }, + { + "epoch": 1.1191152848140369, + "grad_norm": 1.210365574835302, + "learning_rate": 8.870786682260465e-06, + "loss": 0.2507505714893341, + "step": 2265 + }, + { + "epoch": 1.119609539107871, + "grad_norm": 1.3229609964254254, + "learning_rate": 8.86266872767617e-06, + "loss": 0.303046315908432, + "step": 2266 + }, + { + "epoch": 1.120103793401705, + "grad_norm": 1.282548473383847, + "learning_rate": 8.854551532357977e-06, + "loss": 0.257943332195282, + "step": 2267 + }, + { + "epoch": 1.1205980476955393, + "grad_norm": 1.2641740973335522, + "learning_rate": 8.84643510172482e-06, + "loss": 0.2697421610355377, + "step": 2268 + }, + { + "epoch": 1.1210923019893735, + "grad_norm": 1.126371134669409, + "learning_rate": 8.838319441195105e-06, + "loss": 0.20090234279632568, + "step": 2269 + }, + { + "epoch": 1.1215865562832077, + "grad_norm": 1.3584193930662543, + "learning_rate": 8.830204556186736e-06, + "loss": 0.2714189887046814, + "step": 2270 + }, + { + "epoch": 1.122080810577042, + "grad_norm": 1.1168786328747864, + "learning_rate": 8.822090452117084e-06, + "loss": 0.23497477173805237, + "step": 2271 + }, + { + "epoch": 1.122575064870876, + "grad_norm": 1.3047944688196833, + "learning_rate": 8.81397713440302e-06, + "loss": 0.2582445740699768, + "step": 2272 + }, + { + "epoch": 1.1230693191647103, + "grad_norm": 1.2807794267280126, + "learning_rate": 8.805864608460876e-06, + "loss": 0.26494619250297546, + "step": 2273 + }, + { + "epoch": 1.1235635734585445, + "grad_norm": 1.3251515621500554, + "learning_rate": 8.797752879706455e-06, + "loss": 0.2767868936061859, + "step": 2274 + }, + { + "epoch": 1.1240578277523785, + "grad_norm": 1.5161646380346314, + "learning_rate": 8.789641953555032e-06, + "loss": 0.27696311473846436, + "step": 2275 + }, + { + "epoch": 1.1245520820462127, + "grad_norm": 1.3659389136687503, + "learning_rate": 8.78153183542135e-06, + "loss": 0.27048689126968384, + "step": 2276 + }, + { + "epoch": 1.125046336340047, + "grad_norm": 1.3893625373049876, + "learning_rate": 8.773422530719606e-06, + "loss": 0.2940211892127991, + "step": 2277 + }, + { + "epoch": 1.1255405906338811, + "grad_norm": 1.310212206650707, + "learning_rate": 8.765314044863453e-06, + "loss": 0.24859851598739624, + "step": 2278 + }, + { + "epoch": 1.1260348449277153, + "grad_norm": 1.3087530353150083, + "learning_rate": 8.757206383265998e-06, + "loss": 0.28879350423812866, + "step": 2279 + }, + { + "epoch": 1.1265290992215495, + "grad_norm": 1.2514534154786532, + "learning_rate": 8.74909955133981e-06, + "loss": 0.24804209172725677, + "step": 2280 + }, + { + "epoch": 1.1270233535153837, + "grad_norm": 1.3358056447173947, + "learning_rate": 8.740993554496886e-06, + "loss": 0.3199496567249298, + "step": 2281 + }, + { + "epoch": 1.1275176078092177, + "grad_norm": 2.15705729620974, + "learning_rate": 8.732888398148678e-06, + "loss": 0.3098929524421692, + "step": 2282 + }, + { + "epoch": 1.128011862103052, + "grad_norm": 1.2048730778866592, + "learning_rate": 8.724784087706067e-06, + "loss": 0.21280749142169952, + "step": 2283 + }, + { + "epoch": 1.1285061163968861, + "grad_norm": 1.1819530781050969, + "learning_rate": 8.716680628579382e-06, + "loss": 0.25330856442451477, + "step": 2284 + }, + { + "epoch": 1.1290003706907203, + "grad_norm": 1.2218083349938962, + "learning_rate": 8.708578026178371e-06, + "loss": 0.26141977310180664, + "step": 2285 + }, + { + "epoch": 1.1294946249845546, + "grad_norm": 1.3085311775335164, + "learning_rate": 8.700476285912219e-06, + "loss": 0.2529010772705078, + "step": 2286 + }, + { + "epoch": 1.1299888792783888, + "grad_norm": 1.4496496993285695, + "learning_rate": 8.69237541318953e-06, + "loss": 0.2662504315376282, + "step": 2287 + }, + { + "epoch": 1.130483133572223, + "grad_norm": 1.2797233255982605, + "learning_rate": 8.684275413418329e-06, + "loss": 0.2724575996398926, + "step": 2288 + }, + { + "epoch": 1.1309773878660572, + "grad_norm": 1.2524016016810007, + "learning_rate": 8.676176292006065e-06, + "loss": 0.2820962965488434, + "step": 2289 + }, + { + "epoch": 1.1314716421598914, + "grad_norm": 1.2157522787611978, + "learning_rate": 8.668078054359595e-06, + "loss": 0.2594743072986603, + "step": 2290 + }, + { + "epoch": 1.1319658964537254, + "grad_norm": 1.1017631552140204, + "learning_rate": 8.659980705885183e-06, + "loss": 0.25397709012031555, + "step": 2291 + }, + { + "epoch": 1.1324601507475596, + "grad_norm": 1.3505914192645034, + "learning_rate": 8.651884251988503e-06, + "loss": 0.27261337637901306, + "step": 2292 + }, + { + "epoch": 1.1329544050413938, + "grad_norm": 1.191460472235454, + "learning_rate": 8.643788698074638e-06, + "loss": 0.2726992070674896, + "step": 2293 + }, + { + "epoch": 1.133448659335228, + "grad_norm": 1.2175895117879216, + "learning_rate": 8.635694049548058e-06, + "loss": 0.2792774438858032, + "step": 2294 + }, + { + "epoch": 1.1339429136290622, + "grad_norm": 1.272860546351146, + "learning_rate": 8.627600311812638e-06, + "loss": 0.310885488986969, + "step": 2295 + }, + { + "epoch": 1.1344371679228964, + "grad_norm": 1.2747295027163217, + "learning_rate": 8.619507490271638e-06, + "loss": 0.27060413360595703, + "step": 2296 + }, + { + "epoch": 1.1349314222167306, + "grad_norm": 1.2507140444567972, + "learning_rate": 8.611415590327718e-06, + "loss": 0.27069440484046936, + "step": 2297 + }, + { + "epoch": 1.1354256765105646, + "grad_norm": 1.2299186955801236, + "learning_rate": 8.603324617382905e-06, + "loss": 0.2790459990501404, + "step": 2298 + }, + { + "epoch": 1.1359199308043988, + "grad_norm": 1.2813816772493964, + "learning_rate": 8.595234576838624e-06, + "loss": 0.27170947194099426, + "step": 2299 + }, + { + "epoch": 1.136414185098233, + "grad_norm": 1.1903279302585759, + "learning_rate": 8.587145474095665e-06, + "loss": 0.25313863158226013, + "step": 2300 + }, + { + "epoch": 1.1369084393920672, + "grad_norm": 1.2968469055543796, + "learning_rate": 8.5790573145542e-06, + "loss": 0.289467990398407, + "step": 2301 + }, + { + "epoch": 1.1374026936859014, + "grad_norm": 1.3141096348522086, + "learning_rate": 8.570970103613774e-06, + "loss": 0.29796460270881653, + "step": 2302 + }, + { + "epoch": 1.1378969479797356, + "grad_norm": 1.2855551342619271, + "learning_rate": 8.562883846673286e-06, + "loss": 0.27264270186424255, + "step": 2303 + }, + { + "epoch": 1.1383912022735698, + "grad_norm": 1.2243974310235655, + "learning_rate": 8.554798549131005e-06, + "loss": 0.3099757134914398, + "step": 2304 + }, + { + "epoch": 1.138885456567404, + "grad_norm": 1.2936181628424743, + "learning_rate": 8.546714216384565e-06, + "loss": 0.30002498626708984, + "step": 2305 + }, + { + "epoch": 1.139379710861238, + "grad_norm": 1.7617864884936485, + "learning_rate": 8.538630853830951e-06, + "loss": 0.2428818643093109, + "step": 2306 + }, + { + "epoch": 1.1398739651550722, + "grad_norm": 1.24686983002664, + "learning_rate": 8.530548466866497e-06, + "loss": 0.2601294219493866, + "step": 2307 + }, + { + "epoch": 1.1403682194489064, + "grad_norm": 1.2066765531591284, + "learning_rate": 8.522467060886888e-06, + "loss": 0.23878628015518188, + "step": 2308 + }, + { + "epoch": 1.1408624737427406, + "grad_norm": 1.345733709932402, + "learning_rate": 8.514386641287163e-06, + "loss": 0.2780643403530121, + "step": 2309 + }, + { + "epoch": 1.1413567280365748, + "grad_norm": 1.2756115099724787, + "learning_rate": 8.506307213461689e-06, + "loss": 0.29834824800491333, + "step": 2310 + }, + { + "epoch": 1.141850982330409, + "grad_norm": 1.3376095615389103, + "learning_rate": 8.498228782804175e-06, + "loss": 0.2733996510505676, + "step": 2311 + }, + { + "epoch": 1.1423452366242433, + "grad_norm": 1.3063802509871558, + "learning_rate": 8.490151354707669e-06, + "loss": 0.2524843215942383, + "step": 2312 + }, + { + "epoch": 1.1428394909180772, + "grad_norm": 1.2776723106689647, + "learning_rate": 8.482074934564543e-06, + "loss": 0.29077857732772827, + "step": 2313 + }, + { + "epoch": 1.1433337452119114, + "grad_norm": 1.2114776729729342, + "learning_rate": 8.473999527766503e-06, + "loss": 0.25935155153274536, + "step": 2314 + }, + { + "epoch": 1.1438279995057457, + "grad_norm": 1.3166365920869918, + "learning_rate": 8.465925139704578e-06, + "loss": 0.23595012724399567, + "step": 2315 + }, + { + "epoch": 1.1443222537995799, + "grad_norm": 1.2268504419293456, + "learning_rate": 8.457851775769108e-06, + "loss": 0.25193360447883606, + "step": 2316 + }, + { + "epoch": 1.144816508093414, + "grad_norm": 1.2847886622034916, + "learning_rate": 8.449779441349755e-06, + "loss": 0.26844412088394165, + "step": 2317 + }, + { + "epoch": 1.1453107623872483, + "grad_norm": 1.2550831674884213, + "learning_rate": 8.441708141835499e-06, + "loss": 0.2507320046424866, + "step": 2318 + }, + { + "epoch": 1.1458050166810825, + "grad_norm": 1.31186920690482, + "learning_rate": 8.433637882614624e-06, + "loss": 0.2756047248840332, + "step": 2319 + }, + { + "epoch": 1.1462992709749167, + "grad_norm": 1.3818376930568548, + "learning_rate": 8.425568669074717e-06, + "loss": 0.3136482536792755, + "step": 2320 + }, + { + "epoch": 1.146793525268751, + "grad_norm": 1.3094285230006764, + "learning_rate": 8.417500506602668e-06, + "loss": 0.25975438952445984, + "step": 2321 + }, + { + "epoch": 1.1472877795625849, + "grad_norm": 1.3148310008881885, + "learning_rate": 8.409433400584674e-06, + "loss": 0.2524915039539337, + "step": 2322 + }, + { + "epoch": 1.147782033856419, + "grad_norm": 1.316055955366049, + "learning_rate": 8.401367356406214e-06, + "loss": 0.2731180787086487, + "step": 2323 + }, + { + "epoch": 1.1482762881502533, + "grad_norm": 1.4277670811350172, + "learning_rate": 8.393302379452065e-06, + "loss": 0.27752095460891724, + "step": 2324 + }, + { + "epoch": 1.1487705424440875, + "grad_norm": 1.2586766809004215, + "learning_rate": 8.385238475106287e-06, + "loss": 0.269240140914917, + "step": 2325 + }, + { + "epoch": 1.1492647967379217, + "grad_norm": 1.301058586916402, + "learning_rate": 8.377175648752236e-06, + "loss": 0.2668418288230896, + "step": 2326 + }, + { + "epoch": 1.149759051031756, + "grad_norm": 1.2869179599070777, + "learning_rate": 8.369113905772532e-06, + "loss": 0.29276758432388306, + "step": 2327 + }, + { + "epoch": 1.15025330532559, + "grad_norm": 1.240170388592341, + "learning_rate": 8.361053251549083e-06, + "loss": 0.26562872529029846, + "step": 2328 + }, + { + "epoch": 1.150747559619424, + "grad_norm": 1.2907483203574122, + "learning_rate": 8.352993691463063e-06, + "loss": 0.257779061794281, + "step": 2329 + }, + { + "epoch": 1.1512418139132583, + "grad_norm": 1.3761256870332743, + "learning_rate": 8.344935230894926e-06, + "loss": 0.2871868312358856, + "step": 2330 + }, + { + "epoch": 1.1517360682070925, + "grad_norm": 1.2766304490065612, + "learning_rate": 8.336877875224379e-06, + "loss": 0.25191348791122437, + "step": 2331 + }, + { + "epoch": 1.1522303225009267, + "grad_norm": 1.1532415542893881, + "learning_rate": 8.3288216298304e-06, + "loss": 0.27057239413261414, + "step": 2332 + }, + { + "epoch": 1.152724576794761, + "grad_norm": 1.3903855220327628, + "learning_rate": 8.32076650009122e-06, + "loss": 0.31574326753616333, + "step": 2333 + }, + { + "epoch": 1.1532188310885951, + "grad_norm": 1.5549371484345924, + "learning_rate": 8.312712491384332e-06, + "loss": 0.22503693401813507, + "step": 2334 + }, + { + "epoch": 1.1537130853824293, + "grad_norm": 1.2363735263099107, + "learning_rate": 8.304659609086478e-06, + "loss": 0.25754863023757935, + "step": 2335 + }, + { + "epoch": 1.1542073396762635, + "grad_norm": 1.1790773293013888, + "learning_rate": 8.296607858573646e-06, + "loss": 0.24367934465408325, + "step": 2336 + }, + { + "epoch": 1.1547015939700975, + "grad_norm": 1.2792693896599328, + "learning_rate": 8.288557245221068e-06, + "loss": 0.28907084465026855, + "step": 2337 + }, + { + "epoch": 1.1551958482639317, + "grad_norm": 1.1852966795691644, + "learning_rate": 8.280507774403217e-06, + "loss": 0.24526283144950867, + "step": 2338 + }, + { + "epoch": 1.155690102557766, + "grad_norm": 1.1473094958169556, + "learning_rate": 8.272459451493811e-06, + "loss": 0.21968787908554077, + "step": 2339 + }, + { + "epoch": 1.1561843568516001, + "grad_norm": 1.1613080622383485, + "learning_rate": 8.264412281865791e-06, + "loss": 0.23803061246871948, + "step": 2340 + }, + { + "epoch": 1.1566786111454344, + "grad_norm": 1.2818425210270699, + "learning_rate": 8.256366270891335e-06, + "loss": 0.25715917348861694, + "step": 2341 + }, + { + "epoch": 1.1571728654392686, + "grad_norm": 1.300939575113673, + "learning_rate": 8.248321423941836e-06, + "loss": 0.29443520307540894, + "step": 2342 + }, + { + "epoch": 1.1576671197331028, + "grad_norm": 1.2224332053171705, + "learning_rate": 8.240277746387934e-06, + "loss": 0.24904949963092804, + "step": 2343 + }, + { + "epoch": 1.1581613740269368, + "grad_norm": 1.2866663921835886, + "learning_rate": 8.23223524359946e-06, + "loss": 0.2594628632068634, + "step": 2344 + }, + { + "epoch": 1.158655628320771, + "grad_norm": 1.2731058113968243, + "learning_rate": 8.224193920945482e-06, + "loss": 0.23853302001953125, + "step": 2345 + }, + { + "epoch": 1.1591498826146052, + "grad_norm": 1.3394742959570003, + "learning_rate": 8.216153783794266e-06, + "loss": 0.25465112924575806, + "step": 2346 + }, + { + "epoch": 1.1596441369084394, + "grad_norm": 1.3135301213887383, + "learning_rate": 8.208114837513297e-06, + "loss": 0.28038230538368225, + "step": 2347 + }, + { + "epoch": 1.1601383912022736, + "grad_norm": 1.350685866794537, + "learning_rate": 8.200077087469262e-06, + "loss": 0.3144591450691223, + "step": 2348 + }, + { + "epoch": 1.1606326454961078, + "grad_norm": 1.281224607522297, + "learning_rate": 8.192040539028047e-06, + "loss": 0.25782787799835205, + "step": 2349 + }, + { + "epoch": 1.161126899789942, + "grad_norm": 1.5124699254380607, + "learning_rate": 8.18400519755473e-06, + "loss": 0.21928566694259644, + "step": 2350 + }, + { + "epoch": 1.1616211540837762, + "grad_norm": 1.2617101773123074, + "learning_rate": 8.175971068413598e-06, + "loss": 0.2277221381664276, + "step": 2351 + }, + { + "epoch": 1.1621154083776104, + "grad_norm": 1.3465952359588251, + "learning_rate": 8.16793815696812e-06, + "loss": 0.26971378922462463, + "step": 2352 + }, + { + "epoch": 1.1626096626714444, + "grad_norm": 1.352802202139023, + "learning_rate": 8.15990646858095e-06, + "loss": 0.26448535919189453, + "step": 2353 + }, + { + "epoch": 1.1631039169652786, + "grad_norm": 1.3091049684475664, + "learning_rate": 8.151876008613927e-06, + "loss": 0.26372095942497253, + "step": 2354 + }, + { + "epoch": 1.1635981712591128, + "grad_norm": 1.3450938198850664, + "learning_rate": 8.143846782428078e-06, + "loss": 0.2594243288040161, + "step": 2355 + }, + { + "epoch": 1.164092425552947, + "grad_norm": 1.2377171543356333, + "learning_rate": 8.135818795383597e-06, + "loss": 0.23994986712932587, + "step": 2356 + }, + { + "epoch": 1.1645866798467812, + "grad_norm": 1.2983017697862052, + "learning_rate": 8.12779205283985e-06, + "loss": 0.2746032476425171, + "step": 2357 + }, + { + "epoch": 1.1650809341406154, + "grad_norm": 1.3938993958898265, + "learning_rate": 8.119766560155377e-06, + "loss": 0.3323846161365509, + "step": 2358 + }, + { + "epoch": 1.1655751884344494, + "grad_norm": 1.3890076094482564, + "learning_rate": 8.111742322687886e-06, + "loss": 0.28155508637428284, + "step": 2359 + }, + { + "epoch": 1.1660694427282836, + "grad_norm": 1.361844276882708, + "learning_rate": 8.103719345794237e-06, + "loss": 0.2936748266220093, + "step": 2360 + }, + { + "epoch": 1.1665636970221178, + "grad_norm": 1.2168650482731003, + "learning_rate": 8.095697634830463e-06, + "loss": 0.23575282096862793, + "step": 2361 + }, + { + "epoch": 1.167057951315952, + "grad_norm": 1.277845029620416, + "learning_rate": 8.087677195151737e-06, + "loss": 0.24547496438026428, + "step": 2362 + }, + { + "epoch": 1.1675522056097862, + "grad_norm": 1.3371291006512767, + "learning_rate": 8.079658032112388e-06, + "loss": 0.2936372458934784, + "step": 2363 + }, + { + "epoch": 1.1680464599036204, + "grad_norm": 1.316297337509115, + "learning_rate": 8.071640151065902e-06, + "loss": 0.28602418303489685, + "step": 2364 + }, + { + "epoch": 1.1685407141974546, + "grad_norm": 1.271542457187923, + "learning_rate": 8.0636235573649e-06, + "loss": 0.2742761969566345, + "step": 2365 + }, + { + "epoch": 1.1690349684912889, + "grad_norm": 1.2379702024007857, + "learning_rate": 8.05560825636114e-06, + "loss": 0.2590268552303314, + "step": 2366 + }, + { + "epoch": 1.169529222785123, + "grad_norm": 1.2195835846594238, + "learning_rate": 8.047594253405525e-06, + "loss": 0.26881399750709534, + "step": 2367 + }, + { + "epoch": 1.170023477078957, + "grad_norm": 1.279205613064969, + "learning_rate": 8.039581553848093e-06, + "loss": 0.27069953083992004, + "step": 2368 + }, + { + "epoch": 1.1705177313727912, + "grad_norm": 1.1650094541250327, + "learning_rate": 8.031570163038005e-06, + "loss": 0.27320611476898193, + "step": 2369 + }, + { + "epoch": 1.1710119856666255, + "grad_norm": 1.289507742767465, + "learning_rate": 8.023560086323548e-06, + "loss": 0.26400327682495117, + "step": 2370 + }, + { + "epoch": 1.1715062399604597, + "grad_norm": 1.1403608861276666, + "learning_rate": 8.015551329052136e-06, + "loss": 0.22287744283676147, + "step": 2371 + }, + { + "epoch": 1.1720004942542939, + "grad_norm": 1.2409841787965832, + "learning_rate": 8.007543896570309e-06, + "loss": 0.28240424394607544, + "step": 2372 + }, + { + "epoch": 1.172494748548128, + "grad_norm": 1.3414402473623117, + "learning_rate": 7.999537794223702e-06, + "loss": 0.27119147777557373, + "step": 2373 + }, + { + "epoch": 1.1729890028419623, + "grad_norm": 1.376418134177551, + "learning_rate": 7.991533027357085e-06, + "loss": 0.2579900920391083, + "step": 2374 + }, + { + "epoch": 1.1734832571357963, + "grad_norm": 1.197547817498857, + "learning_rate": 7.983529601314317e-06, + "loss": 0.25550374388694763, + "step": 2375 + }, + { + "epoch": 1.1739775114296305, + "grad_norm": 1.119102387270249, + "learning_rate": 7.97552752143838e-06, + "loss": 0.21197429299354553, + "step": 2376 + }, + { + "epoch": 1.1744717657234647, + "grad_norm": 1.245296460371477, + "learning_rate": 7.96752679307134e-06, + "loss": 0.28724029660224915, + "step": 2377 + }, + { + "epoch": 1.1749660200172989, + "grad_norm": 1.119081251981291, + "learning_rate": 7.959527421554375e-06, + "loss": 0.24320468306541443, + "step": 2378 + }, + { + "epoch": 1.175460274311133, + "grad_norm": 1.1094352642608503, + "learning_rate": 7.951529412227745e-06, + "loss": 0.22487501800060272, + "step": 2379 + }, + { + "epoch": 1.1759545286049673, + "grad_norm": 1.1424975538486684, + "learning_rate": 7.943532770430811e-06, + "loss": 0.2754969894886017, + "step": 2380 + }, + { + "epoch": 1.1764487828988015, + "grad_norm": 1.2424832323819373, + "learning_rate": 7.93553750150202e-06, + "loss": 0.2734825909137726, + "step": 2381 + }, + { + "epoch": 1.1769430371926357, + "grad_norm": 1.3311172796502668, + "learning_rate": 7.927543610778895e-06, + "loss": 0.2803332209587097, + "step": 2382 + }, + { + "epoch": 1.1774372914864697, + "grad_norm": 1.3572589379934268, + "learning_rate": 7.919551103598037e-06, + "loss": 0.2820316255092621, + "step": 2383 + }, + { + "epoch": 1.177931545780304, + "grad_norm": 1.1984541262238777, + "learning_rate": 7.911559985295142e-06, + "loss": 0.26788315176963806, + "step": 2384 + }, + { + "epoch": 1.178425800074138, + "grad_norm": 1.152974420484647, + "learning_rate": 7.90357026120496e-06, + "loss": 0.2562825083732605, + "step": 2385 + }, + { + "epoch": 1.1789200543679723, + "grad_norm": 1.3733272776027918, + "learning_rate": 7.895581936661316e-06, + "loss": 0.28260675072669983, + "step": 2386 + }, + { + "epoch": 1.1794143086618065, + "grad_norm": 1.2509507258139472, + "learning_rate": 7.887595016997105e-06, + "loss": 0.25887200236320496, + "step": 2387 + }, + { + "epoch": 1.1799085629556407, + "grad_norm": 1.1852436756934879, + "learning_rate": 7.879609507544274e-06, + "loss": 0.2351648062467575, + "step": 2388 + }, + { + "epoch": 1.180402817249475, + "grad_norm": 1.310528017980178, + "learning_rate": 7.871625413633843e-06, + "loss": 0.2958889305591583, + "step": 2389 + }, + { + "epoch": 1.180897071543309, + "grad_norm": 1.260660594043313, + "learning_rate": 7.863642740595873e-06, + "loss": 0.29704710841178894, + "step": 2390 + }, + { + "epoch": 1.1813913258371431, + "grad_norm": 1.1273593973839822, + "learning_rate": 7.855661493759488e-06, + "loss": 0.23283210396766663, + "step": 2391 + }, + { + "epoch": 1.1818855801309773, + "grad_norm": 1.1497387573049556, + "learning_rate": 7.847681678452846e-06, + "loss": 0.22818870842456818, + "step": 2392 + }, + { + "epoch": 1.1823798344248115, + "grad_norm": 1.2334848445567106, + "learning_rate": 7.839703300003163e-06, + "loss": 0.2345077246427536, + "step": 2393 + }, + { + "epoch": 1.1828740887186457, + "grad_norm": 1.3979127898652413, + "learning_rate": 7.831726363736694e-06, + "loss": 0.31161409616470337, + "step": 2394 + }, + { + "epoch": 1.18336834301248, + "grad_norm": 1.3157666615230723, + "learning_rate": 7.823750874978724e-06, + "loss": 0.2958439588546753, + "step": 2395 + }, + { + "epoch": 1.1838625973063142, + "grad_norm": 1.1914805532137183, + "learning_rate": 7.815776839053568e-06, + "loss": 0.24895446002483368, + "step": 2396 + }, + { + "epoch": 1.1843568516001484, + "grad_norm": 1.189611866561264, + "learning_rate": 7.807804261284591e-06, + "loss": 0.2691795825958252, + "step": 2397 + }, + { + "epoch": 1.1848511058939826, + "grad_norm": 1.2282823509277643, + "learning_rate": 7.799833146994165e-06, + "loss": 0.26797783374786377, + "step": 2398 + }, + { + "epoch": 1.1853453601878166, + "grad_norm": 1.2297499766268158, + "learning_rate": 7.791863501503694e-06, + "loss": 0.2665610611438751, + "step": 2399 + }, + { + "epoch": 1.1858396144816508, + "grad_norm": 1.1290863581864232, + "learning_rate": 7.783895330133596e-06, + "loss": 0.24712792038917542, + "step": 2400 + }, + { + "epoch": 1.186333868775485, + "grad_norm": 1.2300895404986125, + "learning_rate": 7.775928638203316e-06, + "loss": 0.24131645262241364, + "step": 2401 + }, + { + "epoch": 1.1868281230693192, + "grad_norm": 1.2566198414342145, + "learning_rate": 7.7679634310313e-06, + "loss": 0.24233923852443695, + "step": 2402 + }, + { + "epoch": 1.1873223773631534, + "grad_norm": 1.2397915401139883, + "learning_rate": 7.759999713935002e-06, + "loss": 0.24929150938987732, + "step": 2403 + }, + { + "epoch": 1.1878166316569876, + "grad_norm": 1.2005274695814647, + "learning_rate": 7.752037492230887e-06, + "loss": 0.266767293214798, + "step": 2404 + }, + { + "epoch": 1.1883108859508218, + "grad_norm": 1.2083997342227277, + "learning_rate": 7.744076771234427e-06, + "loss": 0.257263720035553, + "step": 2405 + }, + { + "epoch": 1.1888051402446558, + "grad_norm": 1.3017758985808945, + "learning_rate": 7.73611755626008e-06, + "loss": 0.26949891448020935, + "step": 2406 + }, + { + "epoch": 1.18929939453849, + "grad_norm": 1.3523825920294412, + "learning_rate": 7.728159852621308e-06, + "loss": 0.250274121761322, + "step": 2407 + }, + { + "epoch": 1.1897936488323242, + "grad_norm": 1.6370645689880403, + "learning_rate": 7.720203665630553e-06, + "loss": 0.2442864030599594, + "step": 2408 + }, + { + "epoch": 1.1902879031261584, + "grad_norm": 1.4258170868908235, + "learning_rate": 7.71224900059926e-06, + "loss": 0.273416131734848, + "step": 2409 + }, + { + "epoch": 1.1907821574199926, + "grad_norm": 1.2547538223250059, + "learning_rate": 7.704295862837845e-06, + "loss": 0.2559645175933838, + "step": 2410 + }, + { + "epoch": 1.1912764117138268, + "grad_norm": 1.3439078919148493, + "learning_rate": 7.696344257655713e-06, + "loss": 0.2793371379375458, + "step": 2411 + }, + { + "epoch": 1.191770666007661, + "grad_norm": 1.1661216324600743, + "learning_rate": 7.688394190361235e-06, + "loss": 0.23739437758922577, + "step": 2412 + }, + { + "epoch": 1.1922649203014952, + "grad_norm": 1.293132062594429, + "learning_rate": 7.680445666261766e-06, + "loss": 0.27027466893196106, + "step": 2413 + }, + { + "epoch": 1.1927591745953292, + "grad_norm": 1.2887121644516222, + "learning_rate": 7.672498690663632e-06, + "loss": 0.2641778886318207, + "step": 2414 + }, + { + "epoch": 1.1932534288891634, + "grad_norm": 1.235898023301149, + "learning_rate": 7.664553268872116e-06, + "loss": 0.25086820125579834, + "step": 2415 + }, + { + "epoch": 1.1937476831829976, + "grad_norm": 1.6761712741491541, + "learning_rate": 7.656609406191467e-06, + "loss": 0.2871254086494446, + "step": 2416 + }, + { + "epoch": 1.1942419374768318, + "grad_norm": 1.193500770631568, + "learning_rate": 7.648667107924893e-06, + "loss": 0.2657528221607208, + "step": 2417 + }, + { + "epoch": 1.194736191770666, + "grad_norm": 1.3739698225148846, + "learning_rate": 7.640726379374564e-06, + "loss": 0.26942694187164307, + "step": 2418 + }, + { + "epoch": 1.1952304460645002, + "grad_norm": 1.1561137180130854, + "learning_rate": 7.632787225841593e-06, + "loss": 0.23883840441703796, + "step": 2419 + }, + { + "epoch": 1.1957247003583344, + "grad_norm": 1.215726770348901, + "learning_rate": 7.624849652626049e-06, + "loss": 0.24837304651737213, + "step": 2420 + }, + { + "epoch": 1.1962189546521684, + "grad_norm": 1.194954932679119, + "learning_rate": 7.616913665026936e-06, + "loss": 0.2882450222969055, + "step": 2421 + }, + { + "epoch": 1.1967132089460026, + "grad_norm": 1.4557191034476904, + "learning_rate": 7.608979268342213e-06, + "loss": 0.25877460837364197, + "step": 2422 + }, + { + "epoch": 1.1972074632398368, + "grad_norm": 1.2343724838571453, + "learning_rate": 7.601046467868767e-06, + "loss": 0.26970750093460083, + "step": 2423 + }, + { + "epoch": 1.197701717533671, + "grad_norm": 1.1598747816375319, + "learning_rate": 7.593115268902423e-06, + "loss": 0.23771706223487854, + "step": 2424 + }, + { + "epoch": 1.1981959718275053, + "grad_norm": 1.1949187968831856, + "learning_rate": 7.585185676737932e-06, + "loss": 0.25420787930488586, + "step": 2425 + }, + { + "epoch": 1.1986902261213395, + "grad_norm": 1.248194263596005, + "learning_rate": 7.577257696668982e-06, + "loss": 0.2551025152206421, + "step": 2426 + }, + { + "epoch": 1.1991844804151737, + "grad_norm": 1.1913659485965633, + "learning_rate": 7.569331333988177e-06, + "loss": 0.2302972972393036, + "step": 2427 + }, + { + "epoch": 1.1996787347090079, + "grad_norm": 1.340176223566515, + "learning_rate": 7.561406593987045e-06, + "loss": 0.25811445713043213, + "step": 2428 + }, + { + "epoch": 1.200172989002842, + "grad_norm": 1.1946803554276415, + "learning_rate": 7.5534834819560235e-06, + "loss": 0.2550782561302185, + "step": 2429 + }, + { + "epoch": 1.200667243296676, + "grad_norm": 1.3588122473637638, + "learning_rate": 7.545562003184474e-06, + "loss": 0.24825535714626312, + "step": 2430 + }, + { + "epoch": 1.2011614975905103, + "grad_norm": 1.3105140055807547, + "learning_rate": 7.537642162960664e-06, + "loss": 0.29703712463378906, + "step": 2431 + }, + { + "epoch": 1.2016557518843445, + "grad_norm": 1.2707072551305245, + "learning_rate": 7.5297239665717625e-06, + "loss": 0.26830747723579407, + "step": 2432 + }, + { + "epoch": 1.2021500061781787, + "grad_norm": 1.2272388404108225, + "learning_rate": 7.521807419303846e-06, + "loss": 0.2428341656923294, + "step": 2433 + }, + { + "epoch": 1.202644260472013, + "grad_norm": 1.3310573803274635, + "learning_rate": 7.513892526441883e-06, + "loss": 0.2843051552772522, + "step": 2434 + }, + { + "epoch": 1.203138514765847, + "grad_norm": 1.297091941411815, + "learning_rate": 7.50597929326975e-06, + "loss": 0.2485228031873703, + "step": 2435 + }, + { + "epoch": 1.203632769059681, + "grad_norm": 1.3716686006321661, + "learning_rate": 7.498067725070206e-06, + "loss": 0.25343626737594604, + "step": 2436 + }, + { + "epoch": 1.2041270233535153, + "grad_norm": 1.3197919626781558, + "learning_rate": 7.490157827124902e-06, + "loss": 0.24906575679779053, + "step": 2437 + }, + { + "epoch": 1.2046212776473495, + "grad_norm": 1.6398204697926184, + "learning_rate": 7.4822496047143665e-06, + "loss": 0.33576443791389465, + "step": 2438 + }, + { + "epoch": 1.2051155319411837, + "grad_norm": 1.341601959864184, + "learning_rate": 7.474343063118023e-06, + "loss": 0.2755683362483978, + "step": 2439 + }, + { + "epoch": 1.205609786235018, + "grad_norm": 1.259839098151577, + "learning_rate": 7.466438207614165e-06, + "loss": 0.2667745351791382, + "step": 2440 + }, + { + "epoch": 1.2061040405288521, + "grad_norm": 1.3942381323272646, + "learning_rate": 7.458535043479959e-06, + "loss": 0.2970271408557892, + "step": 2441 + }, + { + "epoch": 1.2065982948226863, + "grad_norm": 1.2934031608191798, + "learning_rate": 7.450633575991442e-06, + "loss": 0.2628048360347748, + "step": 2442 + }, + { + "epoch": 1.2070925491165205, + "grad_norm": 1.3935428467061275, + "learning_rate": 7.442733810423526e-06, + "loss": 0.29923003911972046, + "step": 2443 + }, + { + "epoch": 1.2075868034103547, + "grad_norm": 1.2121764987473183, + "learning_rate": 7.4348357520499805e-06, + "loss": 0.2486419975757599, + "step": 2444 + }, + { + "epoch": 1.2080810577041887, + "grad_norm": 1.2651423288599317, + "learning_rate": 7.4269394061434315e-06, + "loss": 0.2711118459701538, + "step": 2445 + }, + { + "epoch": 1.208575311998023, + "grad_norm": 1.2689988235231109, + "learning_rate": 7.419044777975371e-06, + "loss": 0.2568815052509308, + "step": 2446 + }, + { + "epoch": 1.2090695662918571, + "grad_norm": 1.3357220203112758, + "learning_rate": 7.411151872816143e-06, + "loss": 0.2546462416648865, + "step": 2447 + }, + { + "epoch": 1.2095638205856913, + "grad_norm": 1.1716595202066384, + "learning_rate": 7.403260695934933e-06, + "loss": 0.23455393314361572, + "step": 2448 + }, + { + "epoch": 1.2100580748795255, + "grad_norm": 1.3263077198790523, + "learning_rate": 7.395371252599779e-06, + "loss": 0.2874235510826111, + "step": 2449 + }, + { + "epoch": 1.2105523291733598, + "grad_norm": 1.2319732877340805, + "learning_rate": 7.387483548077559e-06, + "loss": 0.2462289184331894, + "step": 2450 + }, + { + "epoch": 1.211046583467194, + "grad_norm": 1.381045021384348, + "learning_rate": 7.379597587633998e-06, + "loss": 0.29385364055633545, + "step": 2451 + }, + { + "epoch": 1.211540837761028, + "grad_norm": 1.1902133906710186, + "learning_rate": 7.371713376533642e-06, + "loss": 0.25049760937690735, + "step": 2452 + }, + { + "epoch": 1.2120350920548622, + "grad_norm": 1.267298470174844, + "learning_rate": 7.363830920039887e-06, + "loss": 0.2748974859714508, + "step": 2453 + }, + { + "epoch": 1.2125293463486964, + "grad_norm": 1.2929931198793703, + "learning_rate": 7.355950223414939e-06, + "loss": 0.2707570791244507, + "step": 2454 + }, + { + "epoch": 1.2130236006425306, + "grad_norm": 1.3328464163268134, + "learning_rate": 7.3480712919198474e-06, + "loss": 0.2864024043083191, + "step": 2455 + }, + { + "epoch": 1.2135178549363648, + "grad_norm": 1.400259353784304, + "learning_rate": 7.340194130814466e-06, + "loss": 0.3181900680065155, + "step": 2456 + }, + { + "epoch": 1.214012109230199, + "grad_norm": 1.2994892273470056, + "learning_rate": 7.332318745357483e-06, + "loss": 0.3022974729537964, + "step": 2457 + }, + { + "epoch": 1.2145063635240332, + "grad_norm": 1.2350650698265369, + "learning_rate": 7.324445140806387e-06, + "loss": 0.2850461006164551, + "step": 2458 + }, + { + "epoch": 1.2150006178178674, + "grad_norm": 1.0534315857750147, + "learning_rate": 7.316573322417483e-06, + "loss": 0.21958643198013306, + "step": 2459 + }, + { + "epoch": 1.2154948721117014, + "grad_norm": 1.3531472648001939, + "learning_rate": 7.3087032954458915e-06, + "loss": 0.2517468333244324, + "step": 2460 + }, + { + "epoch": 1.2159891264055356, + "grad_norm": 1.1714370722498957, + "learning_rate": 7.300835065145526e-06, + "loss": 0.26957637071609497, + "step": 2461 + }, + { + "epoch": 1.2164833806993698, + "grad_norm": 1.2755586367674554, + "learning_rate": 7.292968636769103e-06, + "loss": 0.2699058949947357, + "step": 2462 + }, + { + "epoch": 1.216977634993204, + "grad_norm": 1.2382912705778586, + "learning_rate": 7.285104015568138e-06, + "loss": 0.25076431035995483, + "step": 2463 + }, + { + "epoch": 1.2174718892870382, + "grad_norm": 1.2104527847150177, + "learning_rate": 7.277241206792944e-06, + "loss": 0.24862724542617798, + "step": 2464 + }, + { + "epoch": 1.2179661435808724, + "grad_norm": 1.3107261919810722, + "learning_rate": 7.269380215692614e-06, + "loss": 0.27427712082862854, + "step": 2465 + }, + { + "epoch": 1.2184603978747066, + "grad_norm": 1.2946586839730188, + "learning_rate": 7.261521047515041e-06, + "loss": 0.24343061447143555, + "step": 2466 + }, + { + "epoch": 1.2189546521685406, + "grad_norm": 1.1968860231182823, + "learning_rate": 7.253663707506882e-06, + "loss": 0.25482866168022156, + "step": 2467 + }, + { + "epoch": 1.2194489064623748, + "grad_norm": 1.2806570256332481, + "learning_rate": 7.2458082009135964e-06, + "loss": 0.27699458599090576, + "step": 2468 + }, + { + "epoch": 1.219943160756209, + "grad_norm": 1.3000686730507884, + "learning_rate": 7.237954532979401e-06, + "loss": 0.26576149463653564, + "step": 2469 + }, + { + "epoch": 1.2204374150500432, + "grad_norm": 1.2984838025251157, + "learning_rate": 7.230102708947298e-06, + "loss": 0.287861168384552, + "step": 2470 + }, + { + "epoch": 1.2209316693438774, + "grad_norm": 1.2911534198412806, + "learning_rate": 7.2222527340590434e-06, + "loss": 0.25484874844551086, + "step": 2471 + }, + { + "epoch": 1.2214259236377116, + "grad_norm": 1.284847349415858, + "learning_rate": 7.214404613555177e-06, + "loss": 0.26371529698371887, + "step": 2472 + }, + { + "epoch": 1.2219201779315458, + "grad_norm": 1.334957534550205, + "learning_rate": 7.206558352674992e-06, + "loss": 0.23692578077316284, + "step": 2473 + }, + { + "epoch": 1.22241443222538, + "grad_norm": 1.2696744902236006, + "learning_rate": 7.198713956656538e-06, + "loss": 0.26369085907936096, + "step": 2474 + }, + { + "epoch": 1.2229086865192142, + "grad_norm": 1.4374683516439322, + "learning_rate": 7.1908714307366145e-06, + "loss": 0.260580450296402, + "step": 2475 + }, + { + "epoch": 1.2234029408130482, + "grad_norm": 1.280804641850837, + "learning_rate": 7.1830307801507904e-06, + "loss": 0.2693007290363312, + "step": 2476 + }, + { + "epoch": 1.2238971951068824, + "grad_norm": 1.3429546136121409, + "learning_rate": 7.1751920101333695e-06, + "loss": 0.26629775762557983, + "step": 2477 + }, + { + "epoch": 1.2243914494007166, + "grad_norm": 1.3999841706301799, + "learning_rate": 7.167355125917399e-06, + "loss": 0.2963234782218933, + "step": 2478 + }, + { + "epoch": 1.2248857036945509, + "grad_norm": 1.2332551275962955, + "learning_rate": 7.159520132734669e-06, + "loss": 0.24415187537670135, + "step": 2479 + }, + { + "epoch": 1.225379957988385, + "grad_norm": 1.3645078601677985, + "learning_rate": 7.15168703581572e-06, + "loss": 0.2941599190235138, + "step": 2480 + }, + { + "epoch": 1.2258742122822193, + "grad_norm": 1.2551885597461083, + "learning_rate": 7.1438558403898065e-06, + "loss": 0.22807514667510986, + "step": 2481 + }, + { + "epoch": 1.2263684665760535, + "grad_norm": 1.3774209397395383, + "learning_rate": 7.136026551684923e-06, + "loss": 0.28865426778793335, + "step": 2482 + }, + { + "epoch": 1.2268627208698875, + "grad_norm": 1.3250195381886638, + "learning_rate": 7.1281991749277945e-06, + "loss": 0.3015780448913574, + "step": 2483 + }, + { + "epoch": 1.2273569751637217, + "grad_norm": 1.30264219696165, + "learning_rate": 7.12037371534386e-06, + "loss": 0.2521517872810364, + "step": 2484 + }, + { + "epoch": 1.2278512294575559, + "grad_norm": 1.520486974517902, + "learning_rate": 7.1125501781572896e-06, + "loss": 0.2904277443885803, + "step": 2485 + }, + { + "epoch": 1.22834548375139, + "grad_norm": 1.2434155494713983, + "learning_rate": 7.104728568590966e-06, + "loss": 0.26172375679016113, + "step": 2486 + }, + { + "epoch": 1.2288397380452243, + "grad_norm": 1.3588693705399504, + "learning_rate": 7.096908891866483e-06, + "loss": 0.23565448820590973, + "step": 2487 + }, + { + "epoch": 1.2293339923390585, + "grad_norm": 1.276833588621656, + "learning_rate": 7.0890911532041375e-06, + "loss": 0.2550106644630432, + "step": 2488 + }, + { + "epoch": 1.2298282466328927, + "grad_norm": 1.4167484141197517, + "learning_rate": 7.08127535782295e-06, + "loss": 0.3221823573112488, + "step": 2489 + }, + { + "epoch": 1.230322500926727, + "grad_norm": 1.2657124525427264, + "learning_rate": 7.073461510940631e-06, + "loss": 0.26209163665771484, + "step": 2490 + }, + { + "epoch": 1.2308167552205609, + "grad_norm": 1.3626305998908985, + "learning_rate": 7.06564961777359e-06, + "loss": 0.28635868430137634, + "step": 2491 + }, + { + "epoch": 1.231311009514395, + "grad_norm": 1.417027138446056, + "learning_rate": 7.0578396835369355e-06, + "loss": 0.25630202889442444, + "step": 2492 + }, + { + "epoch": 1.2318052638082293, + "grad_norm": 1.233621488661494, + "learning_rate": 7.050031713444474e-06, + "loss": 0.27345454692840576, + "step": 2493 + }, + { + "epoch": 1.2322995181020635, + "grad_norm": 1.2592068756906736, + "learning_rate": 7.042225712708692e-06, + "loss": 0.2365841269493103, + "step": 2494 + }, + { + "epoch": 1.2327937723958977, + "grad_norm": 1.730933189967813, + "learning_rate": 7.03442168654076e-06, + "loss": 0.2891104221343994, + "step": 2495 + }, + { + "epoch": 1.233288026689732, + "grad_norm": 1.3811266669598459, + "learning_rate": 7.026619640150534e-06, + "loss": 0.2713435888290405, + "step": 2496 + }, + { + "epoch": 1.2337822809835661, + "grad_norm": 1.3509192768016722, + "learning_rate": 7.018819578746557e-06, + "loss": 0.28552842140197754, + "step": 2497 + }, + { + "epoch": 1.2342765352774, + "grad_norm": 1.377186562637688, + "learning_rate": 7.011021507536031e-06, + "loss": 0.2731080949306488, + "step": 2498 + }, + { + "epoch": 1.2347707895712343, + "grad_norm": 1.1800591795719682, + "learning_rate": 7.003225431724841e-06, + "loss": 0.27373206615448, + "step": 2499 + }, + { + "epoch": 1.2352650438650685, + "grad_norm": 1.3197536250384188, + "learning_rate": 6.99543135651753e-06, + "loss": 0.24507245421409607, + "step": 2500 + }, + { + "epoch": 1.2357592981589027, + "grad_norm": 1.2680812543691635, + "learning_rate": 6.9876392871173205e-06, + "loss": 0.2653801739215851, + "step": 2501 + }, + { + "epoch": 1.236253552452737, + "grad_norm": 1.115227060544212, + "learning_rate": 6.979849228726079e-06, + "loss": 0.1929643303155899, + "step": 2502 + }, + { + "epoch": 1.2367478067465711, + "grad_norm": 1.330653204132735, + "learning_rate": 6.972061186544341e-06, + "loss": 0.2684918940067291, + "step": 2503 + }, + { + "epoch": 1.2372420610404053, + "grad_norm": 1.2129572179563677, + "learning_rate": 6.964275165771288e-06, + "loss": 0.23158729076385498, + "step": 2504 + }, + { + "epoch": 1.2377363153342396, + "grad_norm": 1.3192284190451669, + "learning_rate": 6.95649117160476e-06, + "loss": 0.24757611751556396, + "step": 2505 + }, + { + "epoch": 1.2382305696280738, + "grad_norm": 1.328208985585749, + "learning_rate": 6.9487092092412425e-06, + "loss": 0.2651844620704651, + "step": 2506 + }, + { + "epoch": 1.2387248239219077, + "grad_norm": 1.3550284074069674, + "learning_rate": 6.940929283875859e-06, + "loss": 0.26745620369911194, + "step": 2507 + }, + { + "epoch": 1.239219078215742, + "grad_norm": 1.2361002758783033, + "learning_rate": 6.933151400702374e-06, + "loss": 0.22088846564292908, + "step": 2508 + }, + { + "epoch": 1.2397133325095762, + "grad_norm": 1.2379679284464757, + "learning_rate": 6.925375564913193e-06, + "loss": 0.2662886381149292, + "step": 2509 + }, + { + "epoch": 1.2402075868034104, + "grad_norm": 1.3634625495618726, + "learning_rate": 6.917601781699357e-06, + "loss": 0.2691834270954132, + "step": 2510 + }, + { + "epoch": 1.2407018410972446, + "grad_norm": 1.1575744185130052, + "learning_rate": 6.909830056250527e-06, + "loss": 0.2110689878463745, + "step": 2511 + }, + { + "epoch": 1.2411960953910788, + "grad_norm": 1.2961548823459923, + "learning_rate": 6.902060393755001e-06, + "loss": 0.29281991720199585, + "step": 2512 + }, + { + "epoch": 1.2416903496849128, + "grad_norm": 1.2724295845366205, + "learning_rate": 6.894292799399688e-06, + "loss": 0.27409040927886963, + "step": 2513 + }, + { + "epoch": 1.242184603978747, + "grad_norm": 1.304980332058365, + "learning_rate": 6.886527278370131e-06, + "loss": 0.29440224170684814, + "step": 2514 + }, + { + "epoch": 1.2426788582725812, + "grad_norm": 1.1224782958445216, + "learning_rate": 6.878763835850475e-06, + "loss": 0.23107948899269104, + "step": 2515 + }, + { + "epoch": 1.2431731125664154, + "grad_norm": 1.55997556893969, + "learning_rate": 6.871002477023488e-06, + "loss": 0.2682652473449707, + "step": 2516 + }, + { + "epoch": 1.2436673668602496, + "grad_norm": 1.2329698948831815, + "learning_rate": 6.863243207070534e-06, + "loss": 0.2935982644557953, + "step": 2517 + }, + { + "epoch": 1.2441616211540838, + "grad_norm": 1.4373018605291157, + "learning_rate": 6.855486031171597e-06, + "loss": 0.29027625918388367, + "step": 2518 + }, + { + "epoch": 1.244655875447918, + "grad_norm": 1.2739101669235458, + "learning_rate": 6.84773095450526e-06, + "loss": 0.25107353925704956, + "step": 2519 + }, + { + "epoch": 1.2451501297417522, + "grad_norm": 1.2325888755211254, + "learning_rate": 6.839977982248697e-06, + "loss": 0.279231995344162, + "step": 2520 + }, + { + "epoch": 1.2456443840355864, + "grad_norm": 1.2006221660421637, + "learning_rate": 6.832227119577677e-06, + "loss": 0.2544802129268646, + "step": 2521 + }, + { + "epoch": 1.2461386383294204, + "grad_norm": 1.397981415575177, + "learning_rate": 6.824478371666573e-06, + "loss": 0.24365633726119995, + "step": 2522 + }, + { + "epoch": 1.2466328926232546, + "grad_norm": 1.1393524200353975, + "learning_rate": 6.816731743688336e-06, + "loss": 0.2673290967941284, + "step": 2523 + }, + { + "epoch": 1.2471271469170888, + "grad_norm": 1.284093438519867, + "learning_rate": 6.808987240814504e-06, + "loss": 0.23896455764770508, + "step": 2524 + }, + { + "epoch": 1.247621401210923, + "grad_norm": 1.200000168994301, + "learning_rate": 6.801244868215192e-06, + "loss": 0.23196406662464142, + "step": 2525 + }, + { + "epoch": 1.2481156555047572, + "grad_norm": 1.2289321548733863, + "learning_rate": 6.793504631059106e-06, + "loss": 0.24249708652496338, + "step": 2526 + }, + { + "epoch": 1.2486099097985914, + "grad_norm": 1.1511217069627229, + "learning_rate": 6.785766534513514e-06, + "loss": 0.2366780787706375, + "step": 2527 + }, + { + "epoch": 1.2491041640924256, + "grad_norm": 1.291146988373714, + "learning_rate": 6.778030583744254e-06, + "loss": 0.2615105211734772, + "step": 2528 + }, + { + "epoch": 1.2495984183862596, + "grad_norm": 1.4688230831159943, + "learning_rate": 6.770296783915738e-06, + "loss": 0.29761314392089844, + "step": 2529 + }, + { + "epoch": 1.2500926726800938, + "grad_norm": 1.2928438568936322, + "learning_rate": 6.762565140190948e-06, + "loss": 0.25020867586135864, + "step": 2530 + }, + { + "epoch": 1.250586926973928, + "grad_norm": 1.3858962507108388, + "learning_rate": 6.754835657731409e-06, + "loss": 0.2716590166091919, + "step": 2531 + }, + { + "epoch": 1.2510811812677622, + "grad_norm": 1.4048062063243787, + "learning_rate": 6.747108341697221e-06, + "loss": 0.27042001485824585, + "step": 2532 + }, + { + "epoch": 1.2515754355615964, + "grad_norm": 1.3297085932201778, + "learning_rate": 6.739383197247023e-06, + "loss": 0.2659035325050354, + "step": 2533 + }, + { + "epoch": 1.2520696898554307, + "grad_norm": 1.3945414928963702, + "learning_rate": 6.731660229538014e-06, + "loss": 0.2803581655025482, + "step": 2534 + }, + { + "epoch": 1.2525639441492649, + "grad_norm": 1.1484885760506975, + "learning_rate": 6.723939443725938e-06, + "loss": 0.24422097206115723, + "step": 2535 + }, + { + "epoch": 1.253058198443099, + "grad_norm": 1.5676789145324774, + "learning_rate": 6.71622084496508e-06, + "loss": 0.30003631114959717, + "step": 2536 + }, + { + "epoch": 1.2535524527369333, + "grad_norm": 1.3207189074013763, + "learning_rate": 6.708504438408265e-06, + "loss": 0.25745317339897156, + "step": 2537 + }, + { + "epoch": 1.2540467070307673, + "grad_norm": 1.3298790802481242, + "learning_rate": 6.700790229206856e-06, + "loss": 0.27648618817329407, + "step": 2538 + }, + { + "epoch": 1.2545409613246015, + "grad_norm": 1.2910375745243117, + "learning_rate": 6.6930782225107536e-06, + "loss": 0.2579975724220276, + "step": 2539 + }, + { + "epoch": 1.2550352156184357, + "grad_norm": 1.3321333943034437, + "learning_rate": 6.68536842346838e-06, + "loss": 0.2806825637817383, + "step": 2540 + }, + { + "epoch": 1.2555294699122699, + "grad_norm": 1.5211080365897773, + "learning_rate": 6.677660837226685e-06, + "loss": 0.2641657888889313, + "step": 2541 + }, + { + "epoch": 1.256023724206104, + "grad_norm": 1.3170844434659201, + "learning_rate": 6.669955468931142e-06, + "loss": 0.25483542680740356, + "step": 2542 + }, + { + "epoch": 1.2565179784999383, + "grad_norm": 1.438596032878092, + "learning_rate": 6.662252323725751e-06, + "loss": 0.264334112405777, + "step": 2543 + }, + { + "epoch": 1.2570122327937723, + "grad_norm": 1.2825942587632855, + "learning_rate": 6.654551406753017e-06, + "loss": 0.2541567385196686, + "step": 2544 + }, + { + "epoch": 1.2575064870876065, + "grad_norm": 1.3007868833040497, + "learning_rate": 6.646852723153965e-06, + "loss": 0.2695424258708954, + "step": 2545 + }, + { + "epoch": 1.2580007413814407, + "grad_norm": 1.2114763710946868, + "learning_rate": 6.63915627806812e-06, + "loss": 0.2694344222545624, + "step": 2546 + }, + { + "epoch": 1.258494995675275, + "grad_norm": 1.3203626104751756, + "learning_rate": 6.631462076633527e-06, + "loss": 0.2695961892604828, + "step": 2547 + }, + { + "epoch": 1.258989249969109, + "grad_norm": 1.43655166025842, + "learning_rate": 6.623770123986719e-06, + "loss": 0.26878753304481506, + "step": 2548 + }, + { + "epoch": 1.2594835042629433, + "grad_norm": 1.4117532208090406, + "learning_rate": 6.616080425262738e-06, + "loss": 0.27568501234054565, + "step": 2549 + }, + { + "epoch": 1.2599777585567775, + "grad_norm": 1.4407785281346286, + "learning_rate": 6.608392985595111e-06, + "loss": 0.2991989254951477, + "step": 2550 + }, + { + "epoch": 1.2604720128506117, + "grad_norm": 1.2938769852574108, + "learning_rate": 6.600707810115869e-06, + "loss": 0.21832239627838135, + "step": 2551 + }, + { + "epoch": 1.260966267144446, + "grad_norm": 1.3528768023288296, + "learning_rate": 6.593024903955525e-06, + "loss": 0.2671685516834259, + "step": 2552 + }, + { + "epoch": 1.26146052143828, + "grad_norm": 1.142061359022944, + "learning_rate": 6.585344272243073e-06, + "loss": 0.23399557173252106, + "step": 2553 + }, + { + "epoch": 1.2619547757321141, + "grad_norm": 1.3000899404630435, + "learning_rate": 6.577665920105996e-06, + "loss": 0.2701990008354187, + "step": 2554 + }, + { + "epoch": 1.2624490300259483, + "grad_norm": 1.216581780326655, + "learning_rate": 6.56998985267025e-06, + "loss": 0.2679189145565033, + "step": 2555 + }, + { + "epoch": 1.2629432843197825, + "grad_norm": 1.3457541131318878, + "learning_rate": 6.562316075060272e-06, + "loss": 0.2597065567970276, + "step": 2556 + }, + { + "epoch": 1.2634375386136167, + "grad_norm": 1.3732680167208262, + "learning_rate": 6.554644592398962e-06, + "loss": 0.2942010462284088, + "step": 2557 + }, + { + "epoch": 1.263931792907451, + "grad_norm": 1.2654921757837638, + "learning_rate": 6.546975409807696e-06, + "loss": 0.2547098994255066, + "step": 2558 + }, + { + "epoch": 1.264426047201285, + "grad_norm": 1.29416806058113, + "learning_rate": 6.539308532406306e-06, + "loss": 0.2779114246368408, + "step": 2559 + }, + { + "epoch": 1.2649203014951191, + "grad_norm": 1.2525651200835928, + "learning_rate": 6.531643965313093e-06, + "loss": 0.22318917512893677, + "step": 2560 + }, + { + "epoch": 1.2654145557889533, + "grad_norm": 1.2931765026229116, + "learning_rate": 6.523981713644814e-06, + "loss": 0.25439128279685974, + "step": 2561 + }, + { + "epoch": 1.2659088100827876, + "grad_norm": 1.1946536852540512, + "learning_rate": 6.516321782516677e-06, + "loss": 0.2317974865436554, + "step": 2562 + }, + { + "epoch": 1.2664030643766218, + "grad_norm": 1.3517228291780166, + "learning_rate": 6.508664177042339e-06, + "loss": 0.273223876953125, + "step": 2563 + }, + { + "epoch": 1.266897318670456, + "grad_norm": 1.3767500694886763, + "learning_rate": 6.501008902333912e-06, + "loss": 0.28408509492874146, + "step": 2564 + }, + { + "epoch": 1.2673915729642902, + "grad_norm": 1.4378995512233899, + "learning_rate": 6.493355963501951e-06, + "loss": 0.2702238857746124, + "step": 2565 + }, + { + "epoch": 1.2678858272581244, + "grad_norm": 1.2819637354130675, + "learning_rate": 6.485705365655441e-06, + "loss": 0.2142164558172226, + "step": 2566 + }, + { + "epoch": 1.2683800815519586, + "grad_norm": 1.4108385899794438, + "learning_rate": 6.478057113901817e-06, + "loss": 0.2654300928115845, + "step": 2567 + }, + { + "epoch": 1.2688743358457928, + "grad_norm": 1.1724627648861543, + "learning_rate": 6.470411213346941e-06, + "loss": 0.24601367115974426, + "step": 2568 + }, + { + "epoch": 1.2693685901396268, + "grad_norm": 1.36613316910106, + "learning_rate": 6.462767669095109e-06, + "loss": 0.26201942563056946, + "step": 2569 + }, + { + "epoch": 1.269862844433461, + "grad_norm": 1.342399065083916, + "learning_rate": 6.455126486249038e-06, + "loss": 0.2839587926864624, + "step": 2570 + }, + { + "epoch": 1.2703570987272952, + "grad_norm": 1.2538564056049797, + "learning_rate": 6.447487669909873e-06, + "loss": 0.21100708842277527, + "step": 2571 + }, + { + "epoch": 1.2708513530211294, + "grad_norm": 1.1457223195177177, + "learning_rate": 6.439851225177185e-06, + "loss": 0.2181582748889923, + "step": 2572 + }, + { + "epoch": 1.2713456073149636, + "grad_norm": 1.397761306307691, + "learning_rate": 6.432217157148948e-06, + "loss": 0.29196488857269287, + "step": 2573 + }, + { + "epoch": 1.2718398616087978, + "grad_norm": 1.3664440708479575, + "learning_rate": 6.424585470921563e-06, + "loss": 0.2365931123495102, + "step": 2574 + }, + { + "epoch": 1.2723341159026318, + "grad_norm": 1.3496940412150429, + "learning_rate": 6.4169561715898255e-06, + "loss": 0.2277393937110901, + "step": 2575 + }, + { + "epoch": 1.272828370196466, + "grad_norm": 1.3624051718280268, + "learning_rate": 6.409329264246956e-06, + "loss": 0.25285032391548157, + "step": 2576 + }, + { + "epoch": 1.2733226244903002, + "grad_norm": 1.2632390853508073, + "learning_rate": 6.401704753984563e-06, + "loss": 0.253650963306427, + "step": 2577 + }, + { + "epoch": 1.2738168787841344, + "grad_norm": 1.264245223392645, + "learning_rate": 6.394082645892668e-06, + "loss": 0.22143784165382385, + "step": 2578 + }, + { + "epoch": 1.2743111330779686, + "grad_norm": 1.3283739907286298, + "learning_rate": 6.3864629450596696e-06, + "loss": 0.27591395378112793, + "step": 2579 + }, + { + "epoch": 1.2748053873718028, + "grad_norm": 1.6236594986793635, + "learning_rate": 6.37884565657238e-06, + "loss": 0.32865333557128906, + "step": 2580 + }, + { + "epoch": 1.275299641665637, + "grad_norm": 1.2172019661301716, + "learning_rate": 6.371230785515992e-06, + "loss": 0.2743702530860901, + "step": 2581 + }, + { + "epoch": 1.2757938959594712, + "grad_norm": 1.2586352823219396, + "learning_rate": 6.3636183369740845e-06, + "loss": 0.23967956006526947, + "step": 2582 + }, + { + "epoch": 1.2762881502533054, + "grad_norm": 1.206746025741565, + "learning_rate": 6.356008316028614e-06, + "loss": 0.2474803626537323, + "step": 2583 + }, + { + "epoch": 1.2767824045471394, + "grad_norm": 1.2591134604976273, + "learning_rate": 6.348400727759925e-06, + "loss": 0.2523267865180969, + "step": 2584 + }, + { + "epoch": 1.2772766588409736, + "grad_norm": 1.3690385191668641, + "learning_rate": 6.340795577246738e-06, + "loss": 0.2549436092376709, + "step": 2585 + }, + { + "epoch": 1.2777709131348078, + "grad_norm": 1.309885921175695, + "learning_rate": 6.333192869566138e-06, + "loss": 0.2602443993091583, + "step": 2586 + }, + { + "epoch": 1.278265167428642, + "grad_norm": 1.248955873440961, + "learning_rate": 6.325592609793588e-06, + "loss": 0.22912462055683136, + "step": 2587 + }, + { + "epoch": 1.2787594217224763, + "grad_norm": 1.3253843576578603, + "learning_rate": 6.317994803002907e-06, + "loss": 0.3004158139228821, + "step": 2588 + }, + { + "epoch": 1.2792536760163105, + "grad_norm": 1.2054603629919527, + "learning_rate": 6.310399454266289e-06, + "loss": 0.25851407647132874, + "step": 2589 + }, + { + "epoch": 1.2797479303101444, + "grad_norm": 1.2857681683589963, + "learning_rate": 6.302806568654277e-06, + "loss": 0.24637526273727417, + "step": 2590 + }, + { + "epoch": 1.2802421846039787, + "grad_norm": 1.2976312908550238, + "learning_rate": 6.295216151235774e-06, + "loss": 0.26500213146209717, + "step": 2591 + }, + { + "epoch": 1.2807364388978129, + "grad_norm": 1.2103490895138174, + "learning_rate": 6.287628207078031e-06, + "loss": 0.24276241660118103, + "step": 2592 + }, + { + "epoch": 1.281230693191647, + "grad_norm": 2.3839558822188787, + "learning_rate": 6.280042741246655e-06, + "loss": 0.27117204666137695, + "step": 2593 + }, + { + "epoch": 1.2817249474854813, + "grad_norm": 1.4461368742366545, + "learning_rate": 6.272459758805596e-06, + "loss": 0.29287856817245483, + "step": 2594 + }, + { + "epoch": 1.2822192017793155, + "grad_norm": 1.4301387064569637, + "learning_rate": 6.26487926481714e-06, + "loss": 0.3065788149833679, + "step": 2595 + }, + { + "epoch": 1.2827134560731497, + "grad_norm": 1.3198078410588965, + "learning_rate": 6.257301264341915e-06, + "loss": 0.2738455533981323, + "step": 2596 + }, + { + "epoch": 1.283207710366984, + "grad_norm": 1.5398007848288653, + "learning_rate": 6.2497257624388915e-06, + "loss": 0.24216318130493164, + "step": 2597 + }, + { + "epoch": 1.283701964660818, + "grad_norm": 1.2565420891983292, + "learning_rate": 6.242152764165368e-06, + "loss": 0.276785671710968, + "step": 2598 + }, + { + "epoch": 1.2841962189546523, + "grad_norm": 1.2307015932000853, + "learning_rate": 6.234582274576961e-06, + "loss": 0.24999365210533142, + "step": 2599 + }, + { + "epoch": 1.2846904732484863, + "grad_norm": 1.2824145770644522, + "learning_rate": 6.227014298727627e-06, + "loss": 0.27714112401008606, + "step": 2600 + }, + { + "epoch": 1.2851847275423205, + "grad_norm": 1.2260344372038856, + "learning_rate": 6.219448841669639e-06, + "loss": 0.2422318160533905, + "step": 2601 + }, + { + "epoch": 1.2856789818361547, + "grad_norm": 1.3255802725159413, + "learning_rate": 6.21188590845359e-06, + "loss": 0.26688697934150696, + "step": 2602 + }, + { + "epoch": 1.286173236129989, + "grad_norm": 1.2753676961687272, + "learning_rate": 6.204325504128379e-06, + "loss": 0.256889671087265, + "step": 2603 + }, + { + "epoch": 1.2866674904238231, + "grad_norm": 1.3013140965176258, + "learning_rate": 6.196767633741225e-06, + "loss": 0.27372461557388306, + "step": 2604 + }, + { + "epoch": 1.287161744717657, + "grad_norm": 1.3064762941978003, + "learning_rate": 6.189212302337663e-06, + "loss": 0.25194403529167175, + "step": 2605 + }, + { + "epoch": 1.2876559990114913, + "grad_norm": 1.2533511197404907, + "learning_rate": 6.181659514961515e-06, + "loss": 0.24381688237190247, + "step": 2606 + }, + { + "epoch": 1.2881502533053255, + "grad_norm": 1.2987400887924563, + "learning_rate": 6.17410927665492e-06, + "loss": 0.255805104970932, + "step": 2607 + }, + { + "epoch": 1.2886445075991597, + "grad_norm": 1.270289405479379, + "learning_rate": 6.166561592458307e-06, + "loss": 0.25070682168006897, + "step": 2608 + }, + { + "epoch": 1.289138761892994, + "grad_norm": 1.1954868388063873, + "learning_rate": 6.159016467410397e-06, + "loss": 0.24080060422420502, + "step": 2609 + }, + { + "epoch": 1.2896330161868281, + "grad_norm": 1.3524298235557053, + "learning_rate": 6.151473906548215e-06, + "loss": 0.28041762113571167, + "step": 2610 + }, + { + "epoch": 1.2901272704806623, + "grad_norm": 1.3891353799265191, + "learning_rate": 6.143933914907065e-06, + "loss": 0.2624273896217346, + "step": 2611 + }, + { + "epoch": 1.2906215247744965, + "grad_norm": 1.3838932352032651, + "learning_rate": 6.136396497520536e-06, + "loss": 0.2658112049102783, + "step": 2612 + }, + { + "epoch": 1.2911157790683307, + "grad_norm": 1.3103712430992434, + "learning_rate": 6.1288616594205e-06, + "loss": 0.27714237570762634, + "step": 2613 + }, + { + "epoch": 1.291610033362165, + "grad_norm": 1.2276105048536776, + "learning_rate": 6.121329405637111e-06, + "loss": 0.23253153264522552, + "step": 2614 + }, + { + "epoch": 1.292104287655999, + "grad_norm": 1.2168125400378236, + "learning_rate": 6.1137997411987915e-06, + "loss": 0.2438409924507141, + "step": 2615 + }, + { + "epoch": 1.2925985419498331, + "grad_norm": 1.3814066274151728, + "learning_rate": 6.106272671132236e-06, + "loss": 0.24013856053352356, + "step": 2616 + }, + { + "epoch": 1.2930927962436674, + "grad_norm": 1.4362282063831207, + "learning_rate": 6.098748200462408e-06, + "loss": 0.2850446403026581, + "step": 2617 + }, + { + "epoch": 1.2935870505375016, + "grad_norm": 1.3403873033762816, + "learning_rate": 6.0912263342125445e-06, + "loss": 0.22195187211036682, + "step": 2618 + }, + { + "epoch": 1.2940813048313358, + "grad_norm": 1.3701004376420556, + "learning_rate": 6.083707077404129e-06, + "loss": 0.29266390204429626, + "step": 2619 + }, + { + "epoch": 1.29457555912517, + "grad_norm": 1.2103981171479565, + "learning_rate": 6.076190435056913e-06, + "loss": 0.26741352677345276, + "step": 2620 + }, + { + "epoch": 1.295069813419004, + "grad_norm": 1.259544042020202, + "learning_rate": 6.068676412188892e-06, + "loss": 0.26014602184295654, + "step": 2621 + }, + { + "epoch": 1.2955640677128382, + "grad_norm": 1.2871395012144142, + "learning_rate": 6.061165013816333e-06, + "loss": 0.2561393976211548, + "step": 2622 + }, + { + "epoch": 1.2960583220066724, + "grad_norm": 1.312678751233067, + "learning_rate": 6.053656244953728e-06, + "loss": 0.2952851951122284, + "step": 2623 + }, + { + "epoch": 1.2965525763005066, + "grad_norm": 1.2817239432203538, + "learning_rate": 6.046150110613831e-06, + "loss": 0.2830423414707184, + "step": 2624 + }, + { + "epoch": 1.2970468305943408, + "grad_norm": 1.2514529269380406, + "learning_rate": 6.038646615807622e-06, + "loss": 0.22306497395038605, + "step": 2625 + }, + { + "epoch": 1.297541084888175, + "grad_norm": 1.3018072981213034, + "learning_rate": 6.031145765544333e-06, + "loss": 0.23291784524917603, + "step": 2626 + }, + { + "epoch": 1.2980353391820092, + "grad_norm": 1.3763927806121403, + "learning_rate": 6.023647564831425e-06, + "loss": 0.2376563400030136, + "step": 2627 + }, + { + "epoch": 1.2985295934758434, + "grad_norm": 1.3283544756021872, + "learning_rate": 6.016152018674588e-06, + "loss": 0.2873516380786896, + "step": 2628 + }, + { + "epoch": 1.2990238477696776, + "grad_norm": 1.2475849952661122, + "learning_rate": 6.00865913207774e-06, + "loss": 0.2416999638080597, + "step": 2629 + }, + { + "epoch": 1.2995181020635118, + "grad_norm": 1.2254304075146119, + "learning_rate": 6.001168910043023e-06, + "loss": 0.2627726197242737, + "step": 2630 + }, + { + "epoch": 1.3000123563573458, + "grad_norm": 1.4025542210635493, + "learning_rate": 5.993681357570809e-06, + "loss": 0.25375279784202576, + "step": 2631 + }, + { + "epoch": 1.30050661065118, + "grad_norm": 1.3348797401747288, + "learning_rate": 5.986196479659676e-06, + "loss": 0.2853030562400818, + "step": 2632 + }, + { + "epoch": 1.3010008649450142, + "grad_norm": 1.3089867713489467, + "learning_rate": 5.978714281306425e-06, + "loss": 0.2626519501209259, + "step": 2633 + }, + { + "epoch": 1.3014951192388484, + "grad_norm": 1.4566011034207051, + "learning_rate": 5.971234767506057e-06, + "loss": 0.2895713448524475, + "step": 2634 + }, + { + "epoch": 1.3019893735326826, + "grad_norm": 1.2504104998957544, + "learning_rate": 5.9637579432518e-06, + "loss": 0.24617832899093628, + "step": 2635 + }, + { + "epoch": 1.3024836278265166, + "grad_norm": 1.2199824881911456, + "learning_rate": 5.956283813535066e-06, + "loss": 0.25497785210609436, + "step": 2636 + }, + { + "epoch": 1.3029778821203508, + "grad_norm": 1.3200409304272294, + "learning_rate": 5.948812383345484e-06, + "loss": 0.25832462310791016, + "step": 2637 + }, + { + "epoch": 1.303472136414185, + "grad_norm": 1.214232538768618, + "learning_rate": 5.941343657670866e-06, + "loss": 0.24273909628391266, + "step": 2638 + }, + { + "epoch": 1.3039663907080192, + "grad_norm": 1.2844572342866962, + "learning_rate": 5.933877641497232e-06, + "loss": 0.2668009400367737, + "step": 2639 + }, + { + "epoch": 1.3044606450018534, + "grad_norm": 1.2388896928667246, + "learning_rate": 5.92641433980879e-06, + "loss": 0.2519373595714569, + "step": 2640 + }, + { + "epoch": 1.3049548992956876, + "grad_norm": 1.3760811135868023, + "learning_rate": 5.918953757587928e-06, + "loss": 0.30091768503189087, + "step": 2641 + }, + { + "epoch": 1.3054491535895218, + "grad_norm": 1.207587317973019, + "learning_rate": 5.911495899815225e-06, + "loss": 0.2504241466522217, + "step": 2642 + }, + { + "epoch": 1.305943407883356, + "grad_norm": 1.1902656490822856, + "learning_rate": 5.904040771469444e-06, + "loss": 0.24741190671920776, + "step": 2643 + }, + { + "epoch": 1.3064376621771903, + "grad_norm": 1.2559463008488698, + "learning_rate": 5.896588377527519e-06, + "loss": 0.2636350691318512, + "step": 2644 + }, + { + "epoch": 1.3069319164710245, + "grad_norm": 1.1981836589630794, + "learning_rate": 5.889138722964563e-06, + "loss": 0.22512421011924744, + "step": 2645 + }, + { + "epoch": 1.3074261707648585, + "grad_norm": 1.2451009493990417, + "learning_rate": 5.8816918127538546e-06, + "loss": 0.26447975635528564, + "step": 2646 + }, + { + "epoch": 1.3079204250586927, + "grad_norm": 1.1839899877527418, + "learning_rate": 5.874247651866853e-06, + "loss": 0.22084996104240417, + "step": 2647 + }, + { + "epoch": 1.3084146793525269, + "grad_norm": 1.2774730254159221, + "learning_rate": 5.8668062452731715e-06, + "loss": 0.24033552408218384, + "step": 2648 + }, + { + "epoch": 1.308908933646361, + "grad_norm": 1.305578072115893, + "learning_rate": 5.8593675979405795e-06, + "loss": 0.24829509854316711, + "step": 2649 + }, + { + "epoch": 1.3094031879401953, + "grad_norm": 1.3651670637998603, + "learning_rate": 5.851931714835016e-06, + "loss": 0.29011303186416626, + "step": 2650 + }, + { + "epoch": 1.3098974422340295, + "grad_norm": 1.3474001783390817, + "learning_rate": 5.8444986009205754e-06, + "loss": 0.273196280002594, + "step": 2651 + }, + { + "epoch": 1.3103916965278635, + "grad_norm": 1.4905929158728624, + "learning_rate": 5.837068261159491e-06, + "loss": 0.28843480348587036, + "step": 2652 + }, + { + "epoch": 1.3108859508216977, + "grad_norm": 1.297768951304802, + "learning_rate": 5.829640700512159e-06, + "loss": 0.25919461250305176, + "step": 2653 + }, + { + "epoch": 1.3113802051155319, + "grad_norm": 1.4662917967499176, + "learning_rate": 5.822215923937105e-06, + "loss": 0.24588480591773987, + "step": 2654 + }, + { + "epoch": 1.311874459409366, + "grad_norm": 1.4533199098003418, + "learning_rate": 5.814793936391001e-06, + "loss": 0.26138943433761597, + "step": 2655 + }, + { + "epoch": 1.3123687137032003, + "grad_norm": 1.3892499103405112, + "learning_rate": 5.807374742828675e-06, + "loss": 0.2740943729877472, + "step": 2656 + }, + { + "epoch": 1.3128629679970345, + "grad_norm": 1.2689667055719156, + "learning_rate": 5.7999583482030605e-06, + "loss": 0.2307349294424057, + "step": 2657 + }, + { + "epoch": 1.3133572222908687, + "grad_norm": 1.4143938245126972, + "learning_rate": 5.792544757465242e-06, + "loss": 0.28424161672592163, + "step": 2658 + }, + { + "epoch": 1.313851476584703, + "grad_norm": 1.1640800807114133, + "learning_rate": 5.785133975564426e-06, + "loss": 0.2586106061935425, + "step": 2659 + }, + { + "epoch": 1.3143457308785371, + "grad_norm": 1.3677717868907802, + "learning_rate": 5.7777260074479455e-06, + "loss": 0.23268333077430725, + "step": 2660 + }, + { + "epoch": 1.314839985172371, + "grad_norm": 1.3455357811423037, + "learning_rate": 5.770320858061254e-06, + "loss": 0.22144779562950134, + "step": 2661 + }, + { + "epoch": 1.3153342394662053, + "grad_norm": 1.1539678539958322, + "learning_rate": 5.762918532347925e-06, + "loss": 0.2450334131717682, + "step": 2662 + }, + { + "epoch": 1.3158284937600395, + "grad_norm": 1.3018328480210146, + "learning_rate": 5.7555190352496375e-06, + "loss": 0.24483400583267212, + "step": 2663 + }, + { + "epoch": 1.3163227480538737, + "grad_norm": 1.484825672376601, + "learning_rate": 5.748122371706198e-06, + "loss": 0.2590720057487488, + "step": 2664 + }, + { + "epoch": 1.316817002347708, + "grad_norm": 1.5800261617865896, + "learning_rate": 5.740728546655515e-06, + "loss": 0.27116847038269043, + "step": 2665 + }, + { + "epoch": 1.3173112566415421, + "grad_norm": 1.3133697295364004, + "learning_rate": 5.733337565033595e-06, + "loss": 0.2720273435115814, + "step": 2666 + }, + { + "epoch": 1.3178055109353761, + "grad_norm": 1.223977156924706, + "learning_rate": 5.7259494317745514e-06, + "loss": 0.22150173783302307, + "step": 2667 + }, + { + "epoch": 1.3182997652292103, + "grad_norm": 1.2771960781536442, + "learning_rate": 5.718564151810597e-06, + "loss": 0.27474984526634216, + "step": 2668 + }, + { + "epoch": 1.3187940195230445, + "grad_norm": 1.3886425317966573, + "learning_rate": 5.711181730072044e-06, + "loss": 0.2547265291213989, + "step": 2669 + }, + { + "epoch": 1.3192882738168787, + "grad_norm": 1.3822455459704068, + "learning_rate": 5.703802171487286e-06, + "loss": 0.2686036229133606, + "step": 2670 + }, + { + "epoch": 1.319782528110713, + "grad_norm": 1.2588694556349689, + "learning_rate": 5.696425480982814e-06, + "loss": 0.2276458591222763, + "step": 2671 + }, + { + "epoch": 1.3202767824045472, + "grad_norm": 1.3366284026803796, + "learning_rate": 5.6890516634832e-06, + "loss": 0.25005075335502625, + "step": 2672 + }, + { + "epoch": 1.3207710366983814, + "grad_norm": 1.4092724528348008, + "learning_rate": 5.681680723911104e-06, + "loss": 0.25919869542121887, + "step": 2673 + }, + { + "epoch": 1.3212652909922156, + "grad_norm": 1.3254224112633677, + "learning_rate": 5.6743126671872505e-06, + "loss": 0.2684757709503174, + "step": 2674 + }, + { + "epoch": 1.3217595452860498, + "grad_norm": 1.2529305606098464, + "learning_rate": 5.666947498230451e-06, + "loss": 0.2554991543292999, + "step": 2675 + }, + { + "epoch": 1.322253799579884, + "grad_norm": 1.3734571061597927, + "learning_rate": 5.6595852219575975e-06, + "loss": 0.27026665210723877, + "step": 2676 + }, + { + "epoch": 1.322748053873718, + "grad_norm": 1.5029882994051502, + "learning_rate": 5.652225843283629e-06, + "loss": 0.3248092234134674, + "step": 2677 + }, + { + "epoch": 1.3232423081675522, + "grad_norm": 1.3299866785479277, + "learning_rate": 5.644869367121564e-06, + "loss": 0.2554503083229065, + "step": 2678 + }, + { + "epoch": 1.3237365624613864, + "grad_norm": 1.4099986774485116, + "learning_rate": 5.637515798382488e-06, + "loss": 0.25482693314552307, + "step": 2679 + }, + { + "epoch": 1.3242308167552206, + "grad_norm": 1.268292092612611, + "learning_rate": 5.630165141975523e-06, + "loss": 0.24664446711540222, + "step": 2680 + }, + { + "epoch": 1.3247250710490548, + "grad_norm": 1.3674712589344702, + "learning_rate": 5.622817402807879e-06, + "loss": 0.23855865001678467, + "step": 2681 + }, + { + "epoch": 1.325219325342889, + "grad_norm": 1.28659959156705, + "learning_rate": 5.615472585784796e-06, + "loss": 0.2847699820995331, + "step": 2682 + }, + { + "epoch": 1.325713579636723, + "grad_norm": 1.3902791844570088, + "learning_rate": 5.608130695809564e-06, + "loss": 0.2705647051334381, + "step": 2683 + }, + { + "epoch": 1.3262078339305572, + "grad_norm": 1.3726972299660716, + "learning_rate": 5.600791737783523e-06, + "loss": 0.30135318636894226, + "step": 2684 + }, + { + "epoch": 1.3267020882243914, + "grad_norm": 1.3006770767718296, + "learning_rate": 5.593455716606069e-06, + "loss": 0.261536180973053, + "step": 2685 + }, + { + "epoch": 1.3271963425182256, + "grad_norm": 1.2249107195075626, + "learning_rate": 5.586122637174614e-06, + "loss": 0.24006187915802002, + "step": 2686 + }, + { + "epoch": 1.3276905968120598, + "grad_norm": 1.2887498899635654, + "learning_rate": 5.578792504384618e-06, + "loss": 0.27928346395492554, + "step": 2687 + }, + { + "epoch": 1.328184851105894, + "grad_norm": 1.1715759673643904, + "learning_rate": 5.5714653231295745e-06, + "loss": 0.24134980142116547, + "step": 2688 + }, + { + "epoch": 1.3286791053997282, + "grad_norm": 1.2633540397916776, + "learning_rate": 5.5641410983010055e-06, + "loss": 0.27914801239967346, + "step": 2689 + }, + { + "epoch": 1.3291733596935624, + "grad_norm": 1.6348254119913803, + "learning_rate": 5.55681983478846e-06, + "loss": 0.2735476493835449, + "step": 2690 + }, + { + "epoch": 1.3296676139873966, + "grad_norm": 1.3384777828423575, + "learning_rate": 5.549501537479511e-06, + "loss": 0.24919739365577698, + "step": 2691 + }, + { + "epoch": 1.3301618682812306, + "grad_norm": 1.430948519009228, + "learning_rate": 5.542186211259737e-06, + "loss": 0.25435787439346313, + "step": 2692 + }, + { + "epoch": 1.3306561225750648, + "grad_norm": 1.2533415908145504, + "learning_rate": 5.534873861012763e-06, + "loss": 0.2502862811088562, + "step": 2693 + }, + { + "epoch": 1.331150376868899, + "grad_norm": 1.5771700033159861, + "learning_rate": 5.527564491620195e-06, + "loss": 0.25752580165863037, + "step": 2694 + }, + { + "epoch": 1.3316446311627332, + "grad_norm": 1.332305251527839, + "learning_rate": 5.520258107961671e-06, + "loss": 0.22301846742630005, + "step": 2695 + }, + { + "epoch": 1.3321388854565674, + "grad_norm": 1.4890781870784164, + "learning_rate": 5.512954714914825e-06, + "loss": 0.24581964313983917, + "step": 2696 + }, + { + "epoch": 1.3326331397504017, + "grad_norm": 1.3113609641171107, + "learning_rate": 5.5056543173553e-06, + "loss": 0.271970272064209, + "step": 2697 + }, + { + "epoch": 1.3331273940442356, + "grad_norm": 1.2432947451070444, + "learning_rate": 5.498356920156735e-06, + "loss": 0.23041053116321564, + "step": 2698 + }, + { + "epoch": 1.3336216483380698, + "grad_norm": 1.3239879393507852, + "learning_rate": 5.491062528190775e-06, + "loss": 0.2338491678237915, + "step": 2699 + }, + { + "epoch": 1.334115902631904, + "grad_norm": 1.3971989589857847, + "learning_rate": 5.483771146327037e-06, + "loss": 0.2667239010334015, + "step": 2700 + }, + { + "epoch": 1.3346101569257383, + "grad_norm": 1.1737606299055239, + "learning_rate": 5.4764827794331586e-06, + "loss": 0.24761441349983215, + "step": 2701 + }, + { + "epoch": 1.3351044112195725, + "grad_norm": 1.2384835240862428, + "learning_rate": 5.469197432374747e-06, + "loss": 0.24087639153003693, + "step": 2702 + }, + { + "epoch": 1.3355986655134067, + "grad_norm": 1.3287799587341789, + "learning_rate": 5.461915110015386e-06, + "loss": 0.26774898171424866, + "step": 2703 + }, + { + "epoch": 1.3360929198072409, + "grad_norm": 1.432719946516567, + "learning_rate": 5.454635817216658e-06, + "loss": 0.2820417284965515, + "step": 2704 + }, + { + "epoch": 1.336587174101075, + "grad_norm": 1.408646831955897, + "learning_rate": 5.447359558838113e-06, + "loss": 0.2891086935997009, + "step": 2705 + }, + { + "epoch": 1.3370814283949093, + "grad_norm": 1.370327694474157, + "learning_rate": 5.440086339737277e-06, + "loss": 0.24551361799240112, + "step": 2706 + }, + { + "epoch": 1.3375756826887435, + "grad_norm": 1.3889596017030068, + "learning_rate": 5.432816164769648e-06, + "loss": 0.2293522208929062, + "step": 2707 + }, + { + "epoch": 1.3380699369825775, + "grad_norm": 1.255610549812546, + "learning_rate": 5.425549038788693e-06, + "loss": 0.22325105965137482, + "step": 2708 + }, + { + "epoch": 1.3385641912764117, + "grad_norm": 1.3152207031427636, + "learning_rate": 5.4182849666458315e-06, + "loss": 0.2263861447572708, + "step": 2709 + }, + { + "epoch": 1.339058445570246, + "grad_norm": 1.2663328789435477, + "learning_rate": 5.411023953190466e-06, + "loss": 0.26902303099632263, + "step": 2710 + }, + { + "epoch": 1.33955269986408, + "grad_norm": 1.4136099878472004, + "learning_rate": 5.403766003269944e-06, + "loss": 0.26154825091362, + "step": 2711 + }, + { + "epoch": 1.3400469541579143, + "grad_norm": 1.32960722740892, + "learning_rate": 5.396511121729562e-06, + "loss": 0.2878270745277405, + "step": 2712 + }, + { + "epoch": 1.3405412084517483, + "grad_norm": 1.3136699200223048, + "learning_rate": 5.389259313412581e-06, + "loss": 0.26206687092781067, + "step": 2713 + }, + { + "epoch": 1.3410354627455825, + "grad_norm": 1.4998302342686003, + "learning_rate": 5.382010583160201e-06, + "loss": 0.25612518191337585, + "step": 2714 + }, + { + "epoch": 1.3415297170394167, + "grad_norm": 1.2688327982594605, + "learning_rate": 5.374764935811574e-06, + "loss": 0.25600868463516235, + "step": 2715 + }, + { + "epoch": 1.342023971333251, + "grad_norm": 1.274882827976935, + "learning_rate": 5.367522376203787e-06, + "loss": 0.24837616086006165, + "step": 2716 + }, + { + "epoch": 1.3425182256270851, + "grad_norm": 1.2814047275641038, + "learning_rate": 5.360282909171875e-06, + "loss": 0.23487885296344757, + "step": 2717 + }, + { + "epoch": 1.3430124799209193, + "grad_norm": 1.2024219184737237, + "learning_rate": 5.353046539548797e-06, + "loss": 0.22786842286586761, + "step": 2718 + }, + { + "epoch": 1.3435067342147535, + "grad_norm": 1.288373437821988, + "learning_rate": 5.3458132721654564e-06, + "loss": 0.2198137640953064, + "step": 2719 + }, + { + "epoch": 1.3440009885085877, + "grad_norm": 1.157338464361865, + "learning_rate": 5.338583111850671e-06, + "loss": 0.20056495070457458, + "step": 2720 + }, + { + "epoch": 1.344495242802422, + "grad_norm": 1.2341328448147324, + "learning_rate": 5.331356063431195e-06, + "loss": 0.21636295318603516, + "step": 2721 + }, + { + "epoch": 1.3449894970962561, + "grad_norm": 1.2390666617057948, + "learning_rate": 5.32413213173171e-06, + "loss": 0.23933230340480804, + "step": 2722 + }, + { + "epoch": 1.3454837513900901, + "grad_norm": 1.3024836233276083, + "learning_rate": 5.316911321574799e-06, + "loss": 0.2402106523513794, + "step": 2723 + }, + { + "epoch": 1.3459780056839243, + "grad_norm": 1.252933113923405, + "learning_rate": 5.309693637780979e-06, + "loss": 0.22524669766426086, + "step": 2724 + }, + { + "epoch": 1.3464722599777585, + "grad_norm": 1.3140972939485838, + "learning_rate": 5.302479085168668e-06, + "loss": 0.25381600856781006, + "step": 2725 + }, + { + "epoch": 1.3469665142715928, + "grad_norm": 1.2857997911307526, + "learning_rate": 5.295267668554202e-06, + "loss": 0.2614738643169403, + "step": 2726 + }, + { + "epoch": 1.347460768565427, + "grad_norm": 8.575818718402259, + "learning_rate": 5.288059392751817e-06, + "loss": 0.2701472043991089, + "step": 2727 + }, + { + "epoch": 1.3479550228592612, + "grad_norm": 1.378318405059408, + "learning_rate": 5.280854262573661e-06, + "loss": 0.2788996696472168, + "step": 2728 + }, + { + "epoch": 1.3484492771530951, + "grad_norm": 1.2759693341337726, + "learning_rate": 5.273652282829764e-06, + "loss": 0.2419927418231964, + "step": 2729 + }, + { + "epoch": 1.3489435314469294, + "grad_norm": 1.4943656047554885, + "learning_rate": 5.266453458328071e-06, + "loss": 0.26454097032546997, + "step": 2730 + }, + { + "epoch": 1.3494377857407636, + "grad_norm": 1.3109211241308218, + "learning_rate": 5.259257793874421e-06, + "loss": 0.24090510606765747, + "step": 2731 + }, + { + "epoch": 1.3499320400345978, + "grad_norm": 1.3390086912520884, + "learning_rate": 5.252065294272528e-06, + "loss": 0.27343428134918213, + "step": 2732 + }, + { + "epoch": 1.350426294328432, + "grad_norm": 1.3272957509132868, + "learning_rate": 5.244875964324005e-06, + "loss": 0.2623448967933655, + "step": 2733 + }, + { + "epoch": 1.3509205486222662, + "grad_norm": 1.2273005978142049, + "learning_rate": 5.237689808828346e-06, + "loss": 0.22721052169799805, + "step": 2734 + }, + { + "epoch": 1.3514148029161004, + "grad_norm": 1.4111267721919942, + "learning_rate": 5.230506832582924e-06, + "loss": 0.26385387778282166, + "step": 2735 + }, + { + "epoch": 1.3519090572099346, + "grad_norm": 1.4309565613654673, + "learning_rate": 5.223327040382995e-06, + "loss": 0.2679533064365387, + "step": 2736 + }, + { + "epoch": 1.3524033115037688, + "grad_norm": 1.285385576934023, + "learning_rate": 5.2161504370216855e-06, + "loss": 0.25042447447776794, + "step": 2737 + }, + { + "epoch": 1.3528975657976028, + "grad_norm": 1.3420398780717075, + "learning_rate": 5.2089770272899845e-06, + "loss": 0.22735297679901123, + "step": 2738 + }, + { + "epoch": 1.353391820091437, + "grad_norm": 1.2715261749804811, + "learning_rate": 5.201806815976772e-06, + "loss": 0.25517284870147705, + "step": 2739 + }, + { + "epoch": 1.3538860743852712, + "grad_norm": 1.4834789867138143, + "learning_rate": 5.194639807868767e-06, + "loss": 0.2942652702331543, + "step": 2740 + }, + { + "epoch": 1.3543803286791054, + "grad_norm": 1.2535180106339032, + "learning_rate": 5.187476007750567e-06, + "loss": 0.2605661153793335, + "step": 2741 + }, + { + "epoch": 1.3548745829729396, + "grad_norm": 1.34702814682356, + "learning_rate": 5.1803154204046215e-06, + "loss": 0.22976648807525635, + "step": 2742 + }, + { + "epoch": 1.3553688372667738, + "grad_norm": 1.2786328684416228, + "learning_rate": 5.173158050611236e-06, + "loss": 0.24301470816135406, + "step": 2743 + }, + { + "epoch": 1.3558630915606078, + "grad_norm": 1.3509518199555386, + "learning_rate": 5.166003903148568e-06, + "loss": 0.2714199125766754, + "step": 2744 + }, + { + "epoch": 1.356357345854442, + "grad_norm": 1.4130809131188478, + "learning_rate": 5.15885298279263e-06, + "loss": 0.27004045248031616, + "step": 2745 + }, + { + "epoch": 1.3568516001482762, + "grad_norm": 1.1866112739948385, + "learning_rate": 5.151705294317262e-06, + "loss": 0.2062053680419922, + "step": 2746 + }, + { + "epoch": 1.3573458544421104, + "grad_norm": 1.3476275860643891, + "learning_rate": 5.144560842494168e-06, + "loss": 0.2589803636074066, + "step": 2747 + }, + { + "epoch": 1.3578401087359446, + "grad_norm": 1.4207662826517113, + "learning_rate": 5.137419632092886e-06, + "loss": 0.26469242572784424, + "step": 2748 + }, + { + "epoch": 1.3583343630297788, + "grad_norm": 1.217607994018294, + "learning_rate": 5.130281667880774e-06, + "loss": 0.26241326332092285, + "step": 2749 + }, + { + "epoch": 1.358828617323613, + "grad_norm": 1.375829317891462, + "learning_rate": 5.123146954623038e-06, + "loss": 0.2674810290336609, + "step": 2750 + }, + { + "epoch": 1.3593228716174472, + "grad_norm": 1.3872924823998294, + "learning_rate": 5.116015497082719e-06, + "loss": 0.23186063766479492, + "step": 2751 + }, + { + "epoch": 1.3598171259112815, + "grad_norm": 1.3207469475464653, + "learning_rate": 5.108887300020669e-06, + "loss": 0.2794165313243866, + "step": 2752 + }, + { + "epoch": 1.3603113802051157, + "grad_norm": 1.2682065300683938, + "learning_rate": 5.1017623681955705e-06, + "loss": 0.25263023376464844, + "step": 2753 + }, + { + "epoch": 1.3608056344989496, + "grad_norm": 1.385223404499901, + "learning_rate": 5.0946407063639315e-06, + "loss": 0.2503500282764435, + "step": 2754 + }, + { + "epoch": 1.3612998887927839, + "grad_norm": 1.1490078969357793, + "learning_rate": 5.087522319280061e-06, + "loss": 0.21871569752693176, + "step": 2755 + }, + { + "epoch": 1.361794143086618, + "grad_norm": 1.3919853358310244, + "learning_rate": 5.080407211696103e-06, + "loss": 0.2790142893791199, + "step": 2756 + }, + { + "epoch": 1.3622883973804523, + "grad_norm": 1.3837841689522787, + "learning_rate": 5.073295388362003e-06, + "loss": 0.27197304368019104, + "step": 2757 + }, + { + "epoch": 1.3627826516742865, + "grad_norm": 1.3248855835987599, + "learning_rate": 5.066186854025502e-06, + "loss": 0.2402152568101883, + "step": 2758 + }, + { + "epoch": 1.3632769059681207, + "grad_norm": 1.3193984824612894, + "learning_rate": 5.059081613432162e-06, + "loss": 0.24418887495994568, + "step": 2759 + }, + { + "epoch": 1.3637711602619547, + "grad_norm": 1.1840901033348532, + "learning_rate": 5.05197967132534e-06, + "loss": 0.2239491045475006, + "step": 2760 + }, + { + "epoch": 1.3642654145557889, + "grad_norm": 1.3401183348354848, + "learning_rate": 5.044881032446192e-06, + "loss": 0.25177091360092163, + "step": 2761 + }, + { + "epoch": 1.364759668849623, + "grad_norm": 1.2524679914953787, + "learning_rate": 5.0377857015336655e-06, + "loss": 0.25462138652801514, + "step": 2762 + }, + { + "epoch": 1.3652539231434573, + "grad_norm": 1.154660335850044, + "learning_rate": 5.0306936833245034e-06, + "loss": 0.21030092239379883, + "step": 2763 + }, + { + "epoch": 1.3657481774372915, + "grad_norm": 1.2778480955324765, + "learning_rate": 5.0236049825532355e-06, + "loss": 0.24033348262310028, + "step": 2764 + }, + { + "epoch": 1.3662424317311257, + "grad_norm": 1.2874693424331807, + "learning_rate": 5.016519603952177e-06, + "loss": 0.20803815126419067, + "step": 2765 + }, + { + "epoch": 1.36673668602496, + "grad_norm": 1.3360777408248645, + "learning_rate": 5.00943755225143e-06, + "loss": 0.21589599549770355, + "step": 2766 + }, + { + "epoch": 1.367230940318794, + "grad_norm": 1.3112690340132882, + "learning_rate": 5.00235883217886e-06, + "loss": 0.2690975069999695, + "step": 2767 + }, + { + "epoch": 1.3677251946126283, + "grad_norm": 1.395793399890879, + "learning_rate": 4.995283448460131e-06, + "loss": 0.2368423044681549, + "step": 2768 + }, + { + "epoch": 1.3682194489064623, + "grad_norm": 1.428306560095472, + "learning_rate": 4.988211405818661e-06, + "loss": 0.2801262140274048, + "step": 2769 + }, + { + "epoch": 1.3687137032002965, + "grad_norm": 1.4209027545437471, + "learning_rate": 4.981142708975647e-06, + "loss": 0.2777586877346039, + "step": 2770 + }, + { + "epoch": 1.3692079574941307, + "grad_norm": 1.1921679323806382, + "learning_rate": 4.97407736265005e-06, + "loss": 0.2400980144739151, + "step": 2771 + }, + { + "epoch": 1.369702211787965, + "grad_norm": 1.233538906022963, + "learning_rate": 4.967015371558592e-06, + "loss": 0.2513861358165741, + "step": 2772 + }, + { + "epoch": 1.3701964660817991, + "grad_norm": 1.2944813845771217, + "learning_rate": 4.959956740415761e-06, + "loss": 0.2785816490650177, + "step": 2773 + }, + { + "epoch": 1.3706907203756333, + "grad_norm": 1.456856079389265, + "learning_rate": 4.9529014739338e-06, + "loss": 0.29092347621917725, + "step": 2774 + }, + { + "epoch": 1.3711849746694673, + "grad_norm": 1.3133832748237033, + "learning_rate": 4.945849576822693e-06, + "loss": 0.27067384123802185, + "step": 2775 + }, + { + "epoch": 1.3716792289633015, + "grad_norm": 1.3000530351478699, + "learning_rate": 4.938801053790199e-06, + "loss": 0.21500205993652344, + "step": 2776 + }, + { + "epoch": 1.3721734832571357, + "grad_norm": 1.2838621226635265, + "learning_rate": 4.931755909541808e-06, + "loss": 0.2422936111688614, + "step": 2777 + }, + { + "epoch": 1.37266773755097, + "grad_norm": 1.3694112071584477, + "learning_rate": 4.9247141487807515e-06, + "loss": 0.2760060727596283, + "step": 2778 + }, + { + "epoch": 1.3731619918448041, + "grad_norm": 1.39746625445185, + "learning_rate": 4.917675776208013e-06, + "loss": 0.22626326978206635, + "step": 2779 + }, + { + "epoch": 1.3736562461386383, + "grad_norm": 1.34096746485375, + "learning_rate": 4.910640796522308e-06, + "loss": 0.23023411631584167, + "step": 2780 + }, + { + "epoch": 1.3741505004324726, + "grad_norm": 1.29137003736815, + "learning_rate": 4.903609214420088e-06, + "loss": 0.22157053649425507, + "step": 2781 + }, + { + "epoch": 1.3746447547263068, + "grad_norm": 1.1801851543310786, + "learning_rate": 4.89658103459554e-06, + "loss": 0.24125584959983826, + "step": 2782 + }, + { + "epoch": 1.375139009020141, + "grad_norm": 1.3517508821088553, + "learning_rate": 4.889556261740578e-06, + "loss": 0.26294079422950745, + "step": 2783 + }, + { + "epoch": 1.3756332633139752, + "grad_norm": 1.2726719724151299, + "learning_rate": 4.882534900544829e-06, + "loss": 0.25327497720718384, + "step": 2784 + }, + { + "epoch": 1.3761275176078092, + "grad_norm": 1.2868199846308948, + "learning_rate": 4.875516955695663e-06, + "loss": 0.2716723084449768, + "step": 2785 + }, + { + "epoch": 1.3766217719016434, + "grad_norm": 1.4619117882899046, + "learning_rate": 4.8685024318781615e-06, + "loss": 0.2889532446861267, + "step": 2786 + }, + { + "epoch": 1.3771160261954776, + "grad_norm": 1.2622088454697893, + "learning_rate": 4.861491333775114e-06, + "loss": 0.23743030428886414, + "step": 2787 + }, + { + "epoch": 1.3776102804893118, + "grad_norm": 1.2912517641324606, + "learning_rate": 4.8544836660670305e-06, + "loss": 0.27180567383766174, + "step": 2788 + }, + { + "epoch": 1.378104534783146, + "grad_norm": 1.3376004646586275, + "learning_rate": 4.847479433432131e-06, + "loss": 0.2549944221973419, + "step": 2789 + }, + { + "epoch": 1.37859878907698, + "grad_norm": 1.189305404121555, + "learning_rate": 4.8404786405463414e-06, + "loss": 0.24112319946289062, + "step": 2790 + }, + { + "epoch": 1.3790930433708142, + "grad_norm": 1.1833978049698726, + "learning_rate": 4.833481292083291e-06, + "loss": 0.22865869104862213, + "step": 2791 + }, + { + "epoch": 1.3795872976646484, + "grad_norm": 1.268697923498799, + "learning_rate": 4.82648739271431e-06, + "loss": 0.24851003289222717, + "step": 2792 + }, + { + "epoch": 1.3800815519584826, + "grad_norm": 1.2931223721765053, + "learning_rate": 4.819496947108424e-06, + "loss": 0.251456081867218, + "step": 2793 + }, + { + "epoch": 1.3805758062523168, + "grad_norm": 1.4758961733623657, + "learning_rate": 4.81250995993236e-06, + "loss": 0.31711041927337646, + "step": 2794 + }, + { + "epoch": 1.381070060546151, + "grad_norm": 1.3291779254725478, + "learning_rate": 4.805526435850523e-06, + "loss": 0.2204340100288391, + "step": 2795 + }, + { + "epoch": 1.3815643148399852, + "grad_norm": 1.2784619373678463, + "learning_rate": 4.798546379525013e-06, + "loss": 0.26289406418800354, + "step": 2796 + }, + { + "epoch": 1.3820585691338194, + "grad_norm": 1.28320111492484, + "learning_rate": 4.7915697956156284e-06, + "loss": 0.24830611050128937, + "step": 2797 + }, + { + "epoch": 1.3825528234276536, + "grad_norm": 1.2879657785107324, + "learning_rate": 4.784596688779825e-06, + "loss": 0.24792183935642242, + "step": 2798 + }, + { + "epoch": 1.3830470777214878, + "grad_norm": 1.2696074389245717, + "learning_rate": 4.777627063672753e-06, + "loss": 0.2689560651779175, + "step": 2799 + }, + { + "epoch": 1.3835413320153218, + "grad_norm": 1.3225545388421776, + "learning_rate": 4.770660924947238e-06, + "loss": 0.24323254823684692, + "step": 2800 + }, + { + "epoch": 1.384035586309156, + "grad_norm": 1.4076671335254063, + "learning_rate": 4.7636982772537645e-06, + "loss": 0.24404528737068176, + "step": 2801 + }, + { + "epoch": 1.3845298406029902, + "grad_norm": 1.203765816908177, + "learning_rate": 4.7567391252405075e-06, + "loss": 0.23512448370456696, + "step": 2802 + }, + { + "epoch": 1.3850240948968244, + "grad_norm": 1.5018331188451308, + "learning_rate": 4.749783473553297e-06, + "loss": 0.26446110010147095, + "step": 2803 + }, + { + "epoch": 1.3855183491906586, + "grad_norm": 1.408580468005289, + "learning_rate": 4.742831326835618e-06, + "loss": 0.24630968272686005, + "step": 2804 + }, + { + "epoch": 1.3860126034844928, + "grad_norm": 1.3358261514200123, + "learning_rate": 4.735882689728628e-06, + "loss": 0.253492146730423, + "step": 2805 + }, + { + "epoch": 1.3865068577783268, + "grad_norm": 1.3501776737603972, + "learning_rate": 4.7289375668711444e-06, + "loss": 0.271090567111969, + "step": 2806 + }, + { + "epoch": 1.387001112072161, + "grad_norm": 1.278147407656648, + "learning_rate": 4.721995962899625e-06, + "loss": 0.24045832455158234, + "step": 2807 + }, + { + "epoch": 1.3874953663659952, + "grad_norm": 1.3482420589650876, + "learning_rate": 4.715057882448187e-06, + "loss": 0.2525935471057892, + "step": 2808 + }, + { + "epoch": 1.3879896206598294, + "grad_norm": 1.6416013674407632, + "learning_rate": 4.708123330148593e-06, + "loss": 0.30852392315864563, + "step": 2809 + }, + { + "epoch": 1.3884838749536637, + "grad_norm": 1.4379358472073636, + "learning_rate": 4.701192310630253e-06, + "loss": 0.2770250737667084, + "step": 2810 + }, + { + "epoch": 1.3889781292474979, + "grad_norm": 1.3872314722590495, + "learning_rate": 4.6942648285202154e-06, + "loss": 0.29135680198669434, + "step": 2811 + }, + { + "epoch": 1.389472383541332, + "grad_norm": 1.3561535153102244, + "learning_rate": 4.687340888443171e-06, + "loss": 0.26933860778808594, + "step": 2812 + }, + { + "epoch": 1.3899666378351663, + "grad_norm": 1.3589820356083573, + "learning_rate": 4.680420495021436e-06, + "loss": 0.26089105010032654, + "step": 2813 + }, + { + "epoch": 1.3904608921290005, + "grad_norm": 1.446680212777315, + "learning_rate": 4.673503652874977e-06, + "loss": 0.26031410694122314, + "step": 2814 + }, + { + "epoch": 1.3909551464228345, + "grad_norm": 1.4223445911905375, + "learning_rate": 4.6665903666213685e-06, + "loss": 0.2887076139450073, + "step": 2815 + }, + { + "epoch": 1.3914494007166687, + "grad_norm": 1.4125652827001185, + "learning_rate": 4.6596806408758275e-06, + "loss": 0.2360706925392151, + "step": 2816 + }, + { + "epoch": 1.3919436550105029, + "grad_norm": 1.2857689419175287, + "learning_rate": 4.652774480251186e-06, + "loss": 0.22275522351264954, + "step": 2817 + }, + { + "epoch": 1.392437909304337, + "grad_norm": 1.4433288432295395, + "learning_rate": 4.645871889357899e-06, + "loss": 0.2425977736711502, + "step": 2818 + }, + { + "epoch": 1.3929321635981713, + "grad_norm": 1.3257241152583827, + "learning_rate": 4.638972872804038e-06, + "loss": 0.25219830870628357, + "step": 2819 + }, + { + "epoch": 1.3934264178920055, + "grad_norm": 1.3749035761313395, + "learning_rate": 4.6320774351952916e-06, + "loss": 0.28060346841812134, + "step": 2820 + }, + { + "epoch": 1.3939206721858395, + "grad_norm": 1.2003147708990263, + "learning_rate": 4.625185581134942e-06, + "loss": 0.2395240217447281, + "step": 2821 + }, + { + "epoch": 1.3944149264796737, + "grad_norm": 1.1704641579429333, + "learning_rate": 4.618297315223906e-06, + "loss": 0.23622646927833557, + "step": 2822 + }, + { + "epoch": 1.394909180773508, + "grad_norm": 1.2829625624138312, + "learning_rate": 4.611412642060692e-06, + "loss": 0.2189474105834961, + "step": 2823 + }, + { + "epoch": 1.395403435067342, + "grad_norm": 1.433264639271618, + "learning_rate": 4.6045315662414e-06, + "loss": 0.266002357006073, + "step": 2824 + }, + { + "epoch": 1.3958976893611763, + "grad_norm": 1.3252437693414834, + "learning_rate": 4.5976540923597425e-06, + "loss": 0.2402176856994629, + "step": 2825 + }, + { + "epoch": 1.3963919436550105, + "grad_norm": 1.359969321526994, + "learning_rate": 4.5907802250070235e-06, + "loss": 0.2493474781513214, + "step": 2826 + }, + { + "epoch": 1.3968861979488447, + "grad_norm": 1.41117190363675, + "learning_rate": 4.583909968772137e-06, + "loss": 0.25716543197631836, + "step": 2827 + }, + { + "epoch": 1.397380452242679, + "grad_norm": 1.2726969842984424, + "learning_rate": 4.57704332824157e-06, + "loss": 0.29470473527908325, + "step": 2828 + }, + { + "epoch": 1.3978747065365131, + "grad_norm": 1.3349562969336177, + "learning_rate": 4.570180307999394e-06, + "loss": 0.28095656633377075, + "step": 2829 + }, + { + "epoch": 1.3983689608303473, + "grad_norm": 1.3296802970374444, + "learning_rate": 4.563320912627256e-06, + "loss": 0.2351825088262558, + "step": 2830 + }, + { + "epoch": 1.3988632151241813, + "grad_norm": 1.378245480597285, + "learning_rate": 4.556465146704399e-06, + "loss": 0.25859856605529785, + "step": 2831 + }, + { + "epoch": 1.3993574694180155, + "grad_norm": 1.3122509634402246, + "learning_rate": 4.549613014807637e-06, + "loss": 0.2503181993961334, + "step": 2832 + }, + { + "epoch": 1.3998517237118497, + "grad_norm": 1.4164889794081637, + "learning_rate": 4.542764521511345e-06, + "loss": 0.26368820667266846, + "step": 2833 + }, + { + "epoch": 1.400345978005684, + "grad_norm": 1.2584462742908673, + "learning_rate": 4.535919671387483e-06, + "loss": 0.24077676236629486, + "step": 2834 + }, + { + "epoch": 1.4008402322995182, + "grad_norm": 1.3906309875331755, + "learning_rate": 4.529078469005577e-06, + "loss": 0.27042093873023987, + "step": 2835 + }, + { + "epoch": 1.4013344865933524, + "grad_norm": 1.3047899471845867, + "learning_rate": 4.5222409189327155e-06, + "loss": 0.2731306552886963, + "step": 2836 + }, + { + "epoch": 1.4018287408871863, + "grad_norm": 1.293016022457822, + "learning_rate": 4.515407025733548e-06, + "loss": 0.2925037741661072, + "step": 2837 + }, + { + "epoch": 1.4023229951810205, + "grad_norm": 1.3019226114538747, + "learning_rate": 4.508576793970285e-06, + "loss": 0.2927025556564331, + "step": 2838 + }, + { + "epoch": 1.4028172494748548, + "grad_norm": 1.2637397509173496, + "learning_rate": 4.5017502282026926e-06, + "loss": 0.26285338401794434, + "step": 2839 + }, + { + "epoch": 1.403311503768689, + "grad_norm": 1.3147900807622677, + "learning_rate": 4.49492733298809e-06, + "loss": 0.22698873281478882, + "step": 2840 + }, + { + "epoch": 1.4038057580625232, + "grad_norm": 1.3171706155487821, + "learning_rate": 4.488108112881339e-06, + "loss": 0.24116170406341553, + "step": 2841 + }, + { + "epoch": 1.4043000123563574, + "grad_norm": 1.57472275672956, + "learning_rate": 4.481292572434852e-06, + "loss": 0.3211704194545746, + "step": 2842 + }, + { + "epoch": 1.4047942666501916, + "grad_norm": 1.3631722904804857, + "learning_rate": 4.474480716198598e-06, + "loss": 0.26634523272514343, + "step": 2843 + }, + { + "epoch": 1.4052885209440258, + "grad_norm": 1.2801660794508798, + "learning_rate": 4.467672548720066e-06, + "loss": 0.24751242995262146, + "step": 2844 + }, + { + "epoch": 1.40578277523786, + "grad_norm": 1.2023997182117507, + "learning_rate": 4.4608680745442915e-06, + "loss": 0.22031354904174805, + "step": 2845 + }, + { + "epoch": 1.406277029531694, + "grad_norm": 1.4549549871552898, + "learning_rate": 4.454067298213847e-06, + "loss": 0.2474634051322937, + "step": 2846 + }, + { + "epoch": 1.4067712838255282, + "grad_norm": 1.2925543429398942, + "learning_rate": 4.4472702242688315e-06, + "loss": 0.2494845986366272, + "step": 2847 + }, + { + "epoch": 1.4072655381193624, + "grad_norm": 1.246615378915442, + "learning_rate": 4.440476857246876e-06, + "loss": 0.23150494694709778, + "step": 2848 + }, + { + "epoch": 1.4077597924131966, + "grad_norm": 1.3473585855048795, + "learning_rate": 4.433687201683138e-06, + "loss": 0.2093413770198822, + "step": 2849 + }, + { + "epoch": 1.4082540467070308, + "grad_norm": 1.4247715723132508, + "learning_rate": 4.426901262110287e-06, + "loss": 0.26741865277290344, + "step": 2850 + }, + { + "epoch": 1.408748301000865, + "grad_norm": 1.3965732526570211, + "learning_rate": 4.420119043058521e-06, + "loss": 0.2599044740200043, + "step": 2851 + }, + { + "epoch": 1.409242555294699, + "grad_norm": 1.37695062225065, + "learning_rate": 4.413340549055562e-06, + "loss": 0.26934683322906494, + "step": 2852 + }, + { + "epoch": 1.4097368095885332, + "grad_norm": 1.247550824996485, + "learning_rate": 4.4065657846266255e-06, + "loss": 0.2609720528125763, + "step": 2853 + }, + { + "epoch": 1.4102310638823674, + "grad_norm": 1.3034094501092508, + "learning_rate": 4.39979475429445e-06, + "loss": 0.23431813716888428, + "step": 2854 + }, + { + "epoch": 1.4107253181762016, + "grad_norm": 1.5127417165274348, + "learning_rate": 4.39302746257928e-06, + "loss": 0.2791878581047058, + "step": 2855 + }, + { + "epoch": 1.4112195724700358, + "grad_norm": 1.445393105302077, + "learning_rate": 4.386263913998862e-06, + "loss": 0.30482247471809387, + "step": 2856 + }, + { + "epoch": 1.41171382676387, + "grad_norm": 1.517774336378155, + "learning_rate": 4.379504113068445e-06, + "loss": 0.24561305344104767, + "step": 2857 + }, + { + "epoch": 1.4122080810577042, + "grad_norm": 1.2686201180133903, + "learning_rate": 4.372748064300777e-06, + "loss": 0.23973286151885986, + "step": 2858 + }, + { + "epoch": 1.4127023353515384, + "grad_norm": 1.2884315615066577, + "learning_rate": 4.365995772206092e-06, + "loss": 0.26788556575775146, + "step": 2859 + }, + { + "epoch": 1.4131965896453726, + "grad_norm": 1.2479985472864645, + "learning_rate": 4.359247241292136e-06, + "loss": 0.22432288527488708, + "step": 2860 + }, + { + "epoch": 1.4136908439392069, + "grad_norm": 1.4071442664764462, + "learning_rate": 4.352502476064121e-06, + "loss": 0.282687783241272, + "step": 2861 + }, + { + "epoch": 1.4141850982330408, + "grad_norm": 1.350175603929749, + "learning_rate": 4.345761481024761e-06, + "loss": 0.2516692578792572, + "step": 2862 + }, + { + "epoch": 1.414679352526875, + "grad_norm": 1.3813903906983658, + "learning_rate": 4.3390242606742465e-06, + "loss": 0.2473583221435547, + "step": 2863 + }, + { + "epoch": 1.4151736068207093, + "grad_norm": 1.365125849897862, + "learning_rate": 4.33229081951025e-06, + "loss": 0.24372908473014832, + "step": 2864 + }, + { + "epoch": 1.4156678611145435, + "grad_norm": 1.935117633937839, + "learning_rate": 4.325561162027922e-06, + "loss": 0.2877897024154663, + "step": 2865 + }, + { + "epoch": 1.4161621154083777, + "grad_norm": 1.3789670558806315, + "learning_rate": 4.318835292719886e-06, + "loss": 0.2554720342159271, + "step": 2866 + }, + { + "epoch": 1.4166563697022119, + "grad_norm": 1.400243578908533, + "learning_rate": 4.312113216076228e-06, + "loss": 0.26695260405540466, + "step": 2867 + }, + { + "epoch": 1.4171506239960459, + "grad_norm": 1.310264039945657, + "learning_rate": 4.305394936584522e-06, + "loss": 0.26983851194381714, + "step": 2868 + }, + { + "epoch": 1.41764487828988, + "grad_norm": 1.4664847959785403, + "learning_rate": 4.298680458729793e-06, + "loss": 0.303170382976532, + "step": 2869 + }, + { + "epoch": 1.4181391325837143, + "grad_norm": 1.2870012899484584, + "learning_rate": 4.2919697869945234e-06, + "loss": 0.23217584192752838, + "step": 2870 + }, + { + "epoch": 1.4186333868775485, + "grad_norm": 1.3723703910904035, + "learning_rate": 4.285262925858663e-06, + "loss": 0.2895517349243164, + "step": 2871 + }, + { + "epoch": 1.4191276411713827, + "grad_norm": 1.3083324921698822, + "learning_rate": 4.278559879799628e-06, + "loss": 0.24025630950927734, + "step": 2872 + }, + { + "epoch": 1.4196218954652169, + "grad_norm": 1.2827271091784578, + "learning_rate": 4.271860653292263e-06, + "loss": 0.22810839116573334, + "step": 2873 + }, + { + "epoch": 1.420116149759051, + "grad_norm": 1.3806208017840322, + "learning_rate": 4.26516525080888e-06, + "loss": 0.266724169254303, + "step": 2874 + }, + { + "epoch": 1.4206104040528853, + "grad_norm": 1.225057219675358, + "learning_rate": 4.25847367681924e-06, + "loss": 0.22618745267391205, + "step": 2875 + }, + { + "epoch": 1.4211046583467195, + "grad_norm": 1.2369737958102245, + "learning_rate": 4.251785935790529e-06, + "loss": 0.2239789217710495, + "step": 2876 + }, + { + "epoch": 1.4215989126405535, + "grad_norm": 1.4266723106614325, + "learning_rate": 4.245102032187399e-06, + "loss": 0.21519358456134796, + "step": 2877 + }, + { + "epoch": 1.4220931669343877, + "grad_norm": 1.3543349519259755, + "learning_rate": 4.2384219704719284e-06, + "loss": 0.31226712465286255, + "step": 2878 + }, + { + "epoch": 1.422587421228222, + "grad_norm": 1.56763311196269, + "learning_rate": 4.231745755103625e-06, + "loss": 0.26814836263656616, + "step": 2879 + }, + { + "epoch": 1.423081675522056, + "grad_norm": 1.340943129837897, + "learning_rate": 4.225073390539436e-06, + "loss": 0.2369621843099594, + "step": 2880 + }, + { + "epoch": 1.4235759298158903, + "grad_norm": 1.4174455321042607, + "learning_rate": 4.218404881233737e-06, + "loss": 0.2556746304035187, + "step": 2881 + }, + { + "epoch": 1.4240701841097245, + "grad_norm": 1.4008574237374047, + "learning_rate": 4.2117402316383314e-06, + "loss": 0.25875598192214966, + "step": 2882 + }, + { + "epoch": 1.4245644384035585, + "grad_norm": 1.3837412182941131, + "learning_rate": 4.205079446202443e-06, + "loss": 0.26839762926101685, + "step": 2883 + }, + { + "epoch": 1.4250586926973927, + "grad_norm": 1.3404796422391116, + "learning_rate": 4.198422529372717e-06, + "loss": 0.2764383554458618, + "step": 2884 + }, + { + "epoch": 1.425552946991227, + "grad_norm": 1.6233600341280843, + "learning_rate": 4.191769485593216e-06, + "loss": 0.24517112970352173, + "step": 2885 + }, + { + "epoch": 1.4260472012850611, + "grad_norm": 1.2960278491651354, + "learning_rate": 4.18512031930542e-06, + "loss": 0.21880990266799927, + "step": 2886 + }, + { + "epoch": 1.4265414555788953, + "grad_norm": 1.25547495232964, + "learning_rate": 4.178475034948212e-06, + "loss": 0.24671246111392975, + "step": 2887 + }, + { + "epoch": 1.4270357098727295, + "grad_norm": 1.3321806455697769, + "learning_rate": 4.171833636957886e-06, + "loss": 0.25473371148109436, + "step": 2888 + }, + { + "epoch": 1.4275299641665637, + "grad_norm": 1.2832708163920512, + "learning_rate": 4.1651961297681574e-06, + "loss": 0.2675618529319763, + "step": 2889 + }, + { + "epoch": 1.428024218460398, + "grad_norm": 1.361777795281808, + "learning_rate": 4.15856251781012e-06, + "loss": 0.24357986450195312, + "step": 2890 + }, + { + "epoch": 1.4285184727542322, + "grad_norm": 1.360475333723739, + "learning_rate": 4.1519328055122825e-06, + "loss": 0.2668409049510956, + "step": 2891 + }, + { + "epoch": 1.4290127270480664, + "grad_norm": 1.237397304360782, + "learning_rate": 4.145306997300543e-06, + "loss": 0.24507637321949005, + "step": 2892 + }, + { + "epoch": 1.4295069813419004, + "grad_norm": 1.366253286129835, + "learning_rate": 4.1386850975982e-06, + "loss": 0.2791709899902344, + "step": 2893 + }, + { + "epoch": 1.4300012356357346, + "grad_norm": 1.2339989570889298, + "learning_rate": 4.132067110825939e-06, + "loss": 0.24982133507728577, + "step": 2894 + }, + { + "epoch": 1.4304954899295688, + "grad_norm": 1.4357848897595227, + "learning_rate": 4.125453041401835e-06, + "loss": 0.2814679741859436, + "step": 2895 + }, + { + "epoch": 1.430989744223403, + "grad_norm": 1.2447298736764703, + "learning_rate": 4.118842893741336e-06, + "loss": 0.22699782252311707, + "step": 2896 + }, + { + "epoch": 1.4314839985172372, + "grad_norm": 1.9366220135779266, + "learning_rate": 4.112236672257294e-06, + "loss": 0.23297230899333954, + "step": 2897 + }, + { + "epoch": 1.4319782528110712, + "grad_norm": 1.4169021772429402, + "learning_rate": 4.1056343813599265e-06, + "loss": 0.26085159182548523, + "step": 2898 + }, + { + "epoch": 1.4324725071049054, + "grad_norm": 1.2947699028454482, + "learning_rate": 4.0990360254568216e-06, + "loss": 0.27813559770584106, + "step": 2899 + }, + { + "epoch": 1.4329667613987396, + "grad_norm": 1.4648322974961994, + "learning_rate": 4.092441608952953e-06, + "loss": 0.2821611762046814, + "step": 2900 + }, + { + "epoch": 1.4334610156925738, + "grad_norm": 1.4262304528738896, + "learning_rate": 4.085851136250657e-06, + "loss": 0.25223150849342346, + "step": 2901 + }, + { + "epoch": 1.433955269986408, + "grad_norm": 1.2236760469459784, + "learning_rate": 4.079264611749639e-06, + "loss": 0.225361630320549, + "step": 2902 + }, + { + "epoch": 1.4344495242802422, + "grad_norm": 1.2980114377261416, + "learning_rate": 4.07268203984697e-06, + "loss": 0.2564583420753479, + "step": 2903 + }, + { + "epoch": 1.4349437785740764, + "grad_norm": 1.618238680371033, + "learning_rate": 4.066103424937083e-06, + "loss": 0.2433827817440033, + "step": 2904 + }, + { + "epoch": 1.4354380328679106, + "grad_norm": 1.326779755851318, + "learning_rate": 4.059528771411758e-06, + "loss": 0.26073208451271057, + "step": 2905 + }, + { + "epoch": 1.4359322871617448, + "grad_norm": 1.381783420476221, + "learning_rate": 4.052958083660153e-06, + "loss": 0.2937609553337097, + "step": 2906 + }, + { + "epoch": 1.436426541455579, + "grad_norm": 1.2248682484343931, + "learning_rate": 4.046391366068756e-06, + "loss": 0.22026552259922028, + "step": 2907 + }, + { + "epoch": 1.436920795749413, + "grad_norm": 1.2471555303405935, + "learning_rate": 4.039828623021415e-06, + "loss": 0.21137471497058868, + "step": 2908 + }, + { + "epoch": 1.4374150500432472, + "grad_norm": 1.316365476590171, + "learning_rate": 4.033269858899324e-06, + "loss": 0.23597699403762817, + "step": 2909 + }, + { + "epoch": 1.4379093043370814, + "grad_norm": 1.3166979356724768, + "learning_rate": 4.026715078081023e-06, + "loss": 0.2667025923728943, + "step": 2910 + }, + { + "epoch": 1.4384035586309156, + "grad_norm": 1.2942746954451143, + "learning_rate": 4.020164284942387e-06, + "loss": 0.2789616584777832, + "step": 2911 + }, + { + "epoch": 1.4388978129247498, + "grad_norm": 1.2105601579452838, + "learning_rate": 4.013617483856637e-06, + "loss": 0.23176617920398712, + "step": 2912 + }, + { + "epoch": 1.439392067218584, + "grad_norm": 1.3989428986083243, + "learning_rate": 4.007074679194313e-06, + "loss": 0.2814248204231262, + "step": 2913 + }, + { + "epoch": 1.439886321512418, + "grad_norm": 1.7399518805726892, + "learning_rate": 4.000535875323307e-06, + "loss": 0.26201730966567993, + "step": 2914 + }, + { + "epoch": 1.4403805758062522, + "grad_norm": 1.3752450122135709, + "learning_rate": 3.994001076608833e-06, + "loss": 0.22517681121826172, + "step": 2915 + }, + { + "epoch": 1.4408748301000864, + "grad_norm": 1.2576751634156127, + "learning_rate": 3.9874702874134205e-06, + "loss": 0.25220564007759094, + "step": 2916 + }, + { + "epoch": 1.4413690843939206, + "grad_norm": 1.3128506030513347, + "learning_rate": 3.980943512096934e-06, + "loss": 0.23441332578659058, + "step": 2917 + }, + { + "epoch": 1.4418633386877548, + "grad_norm": 1.1616125895518352, + "learning_rate": 3.9744207550165625e-06, + "loss": 0.21659764647483826, + "step": 2918 + }, + { + "epoch": 1.442357592981589, + "grad_norm": 1.3726974417027011, + "learning_rate": 3.967902020526797e-06, + "loss": 0.21888667345046997, + "step": 2919 + }, + { + "epoch": 1.4428518472754233, + "grad_norm": 2.445936326011648, + "learning_rate": 3.961387312979454e-06, + "loss": 0.2771157920360565, + "step": 2920 + }, + { + "epoch": 1.4433461015692575, + "grad_norm": 1.312047281106489, + "learning_rate": 3.9548766367236605e-06, + "loss": 0.21376901865005493, + "step": 2921 + }, + { + "epoch": 1.4438403558630917, + "grad_norm": 1.4472763394283668, + "learning_rate": 3.948369996105849e-06, + "loss": 0.2888128161430359, + "step": 2922 + }, + { + "epoch": 1.4443346101569257, + "grad_norm": 1.327788891714265, + "learning_rate": 3.941867395469761e-06, + "loss": 0.27809786796569824, + "step": 2923 + }, + { + "epoch": 1.4448288644507599, + "grad_norm": 1.377899507369851, + "learning_rate": 3.935368839156443e-06, + "loss": 0.2573625445365906, + "step": 2924 + }, + { + "epoch": 1.445323118744594, + "grad_norm": 1.5375959387987326, + "learning_rate": 3.928874331504232e-06, + "loss": 0.21472841501235962, + "step": 2925 + }, + { + "epoch": 1.4458173730384283, + "grad_norm": 1.2616393731465387, + "learning_rate": 3.922383876848771e-06, + "loss": 0.23214091360569, + "step": 2926 + }, + { + "epoch": 1.4463116273322625, + "grad_norm": 1.2717196020996628, + "learning_rate": 3.915897479522995e-06, + "loss": 0.23830139636993408, + "step": 2927 + }, + { + "epoch": 1.4468058816260967, + "grad_norm": 1.306053937449173, + "learning_rate": 3.909415143857132e-06, + "loss": 0.2519805431365967, + "step": 2928 + }, + { + "epoch": 1.4473001359199307, + "grad_norm": 1.3548983452054761, + "learning_rate": 3.9029368741786935e-06, + "loss": 0.2191445231437683, + "step": 2929 + }, + { + "epoch": 1.4477943902137649, + "grad_norm": 1.2448486288410623, + "learning_rate": 3.896462674812482e-06, + "loss": 0.2267228364944458, + "step": 2930 + }, + { + "epoch": 1.448288644507599, + "grad_norm": 1.3302096442776044, + "learning_rate": 3.88999255008058e-06, + "loss": 0.26456522941589355, + "step": 2931 + }, + { + "epoch": 1.4487828988014333, + "grad_norm": 1.3729869343228434, + "learning_rate": 3.883526504302353e-06, + "loss": 0.25602713227272034, + "step": 2932 + }, + { + "epoch": 1.4492771530952675, + "grad_norm": 1.9847312680384686, + "learning_rate": 3.877064541794435e-06, + "loss": 0.2545332610607147, + "step": 2933 + }, + { + "epoch": 1.4497714073891017, + "grad_norm": 1.3785644388388194, + "learning_rate": 3.87060666687074e-06, + "loss": 0.2846388816833496, + "step": 2934 + }, + { + "epoch": 1.450265661682936, + "grad_norm": 1.4353094721790403, + "learning_rate": 3.864152883842461e-06, + "loss": 0.2686496376991272, + "step": 2935 + }, + { + "epoch": 1.4507599159767701, + "grad_norm": 1.2943779410551872, + "learning_rate": 3.857703197018044e-06, + "loss": 0.2712322473526001, + "step": 2936 + }, + { + "epoch": 1.4512541702706043, + "grad_norm": 1.3542096863749147, + "learning_rate": 3.851257610703209e-06, + "loss": 0.23492589592933655, + "step": 2937 + }, + { + "epoch": 1.4517484245644385, + "grad_norm": 1.2747230322582852, + "learning_rate": 3.84481612920094e-06, + "loss": 0.274332731962204, + "step": 2938 + }, + { + "epoch": 1.4522426788582725, + "grad_norm": 1.4107112786506069, + "learning_rate": 3.838378756811475e-06, + "loss": 0.250995010137558, + "step": 2939 + }, + { + "epoch": 1.4527369331521067, + "grad_norm": 1.3749429977256393, + "learning_rate": 3.831945497832313e-06, + "loss": 0.25221261382102966, + "step": 2940 + }, + { + "epoch": 1.453231187445941, + "grad_norm": 1.4826415922959744, + "learning_rate": 3.825516356558211e-06, + "loss": 0.2549906075000763, + "step": 2941 + }, + { + "epoch": 1.4537254417397751, + "grad_norm": 1.296751596925164, + "learning_rate": 3.819091337281158e-06, + "loss": 0.2369248867034912, + "step": 2942 + }, + { + "epoch": 1.4542196960336093, + "grad_norm": 1.3057816538242708, + "learning_rate": 3.8126704442904182e-06, + "loss": 0.23681433498859406, + "step": 2943 + }, + { + "epoch": 1.4547139503274436, + "grad_norm": 1.237019268284654, + "learning_rate": 3.806253681872486e-06, + "loss": 0.24966523051261902, + "step": 2944 + }, + { + "epoch": 1.4552082046212775, + "grad_norm": 1.4768369352256168, + "learning_rate": 3.7998410543110954e-06, + "loss": 0.28130626678466797, + "step": 2945 + }, + { + "epoch": 1.4557024589151117, + "grad_norm": 1.3443210173277784, + "learning_rate": 3.7934325658872275e-06, + "loss": 0.2725732922554016, + "step": 2946 + }, + { + "epoch": 1.456196713208946, + "grad_norm": 1.3345618379823432, + "learning_rate": 3.7870282208790976e-06, + "loss": 0.23695361614227295, + "step": 2947 + }, + { + "epoch": 1.4566909675027802, + "grad_norm": 1.3094683367768178, + "learning_rate": 3.780628023562154e-06, + "loss": 0.2556610405445099, + "step": 2948 + }, + { + "epoch": 1.4571852217966144, + "grad_norm": 1.29841880424943, + "learning_rate": 3.7742319782090786e-06, + "loss": 0.26012274622917175, + "step": 2949 + }, + { + "epoch": 1.4576794760904486, + "grad_norm": 1.4612114957138427, + "learning_rate": 3.7678400890897827e-06, + "loss": 0.23788896203041077, + "step": 2950 + }, + { + "epoch": 1.4581737303842828, + "grad_norm": 1.4390155766896275, + "learning_rate": 3.7614523604713894e-06, + "loss": 0.2927572727203369, + "step": 2951 + }, + { + "epoch": 1.458667984678117, + "grad_norm": 1.2435143086118214, + "learning_rate": 3.75506879661827e-06, + "loss": 0.2254970222711563, + "step": 2952 + }, + { + "epoch": 1.4591622389719512, + "grad_norm": 1.2816222898303182, + "learning_rate": 3.7486894017919883e-06, + "loss": 0.216854065656662, + "step": 2953 + }, + { + "epoch": 1.4596564932657852, + "grad_norm": 1.1833481657982283, + "learning_rate": 3.7423141802513417e-06, + "loss": 0.2505137026309967, + "step": 2954 + }, + { + "epoch": 1.4601507475596194, + "grad_norm": 1.2187582021965486, + "learning_rate": 3.735943136252337e-06, + "loss": 0.19780108332633972, + "step": 2955 + }, + { + "epoch": 1.4606450018534536, + "grad_norm": 1.482633837182769, + "learning_rate": 3.7295762740481923e-06, + "loss": 0.26869216561317444, + "step": 2956 + }, + { + "epoch": 1.4611392561472878, + "grad_norm": 1.4121232274028632, + "learning_rate": 3.7232135978893336e-06, + "loss": 0.28265517950057983, + "step": 2957 + }, + { + "epoch": 1.461633510441122, + "grad_norm": 1.268342410891318, + "learning_rate": 3.7168551120233965e-06, + "loss": 0.2381918877363205, + "step": 2958 + }, + { + "epoch": 1.4621277647349562, + "grad_norm": 1.3343795310746396, + "learning_rate": 3.710500820695203e-06, + "loss": 0.27194735407829285, + "step": 2959 + }, + { + "epoch": 1.4626220190287902, + "grad_norm": 1.419071318428777, + "learning_rate": 3.7041507281468e-06, + "loss": 0.2611599266529083, + "step": 2960 + }, + { + "epoch": 1.4631162733226244, + "grad_norm": 1.3417831313824735, + "learning_rate": 3.697804838617418e-06, + "loss": 0.2970972955226898, + "step": 2961 + }, + { + "epoch": 1.4636105276164586, + "grad_norm": 1.3986503652920064, + "learning_rate": 3.6914631563434743e-06, + "loss": 0.24313557147979736, + "step": 2962 + }, + { + "epoch": 1.4641047819102928, + "grad_norm": 1.21693161859368, + "learning_rate": 3.685125685558587e-06, + "loss": 0.23243792355060577, + "step": 2963 + }, + { + "epoch": 1.464599036204127, + "grad_norm": 1.384655578733909, + "learning_rate": 3.6787924304935696e-06, + "loss": 0.2850711941719055, + "step": 2964 + }, + { + "epoch": 1.4650932904979612, + "grad_norm": 1.2938153090671698, + "learning_rate": 3.6724633953764023e-06, + "loss": 0.26217392086982727, + "step": 2965 + }, + { + "epoch": 1.4655875447917954, + "grad_norm": 1.3004956100522334, + "learning_rate": 3.666138584432264e-06, + "loss": 0.24623268842697144, + "step": 2966 + }, + { + "epoch": 1.4660817990856296, + "grad_norm": 1.2765502382143128, + "learning_rate": 3.6598180018835063e-06, + "loss": 0.25010040402412415, + "step": 2967 + }, + { + "epoch": 1.4665760533794638, + "grad_norm": 1.2806642930208934, + "learning_rate": 3.6535016519496603e-06, + "loss": 0.24471378326416016, + "step": 2968 + }, + { + "epoch": 1.467070307673298, + "grad_norm": 1.4411992818002375, + "learning_rate": 3.6471895388474323e-06, + "loss": 0.2845621109008789, + "step": 2969 + }, + { + "epoch": 1.467564561967132, + "grad_norm": 1.394997312403621, + "learning_rate": 3.640881666790699e-06, + "loss": 0.26768919825553894, + "step": 2970 + }, + { + "epoch": 1.4680588162609662, + "grad_norm": 1.3707198305280583, + "learning_rate": 3.6345780399904983e-06, + "loss": 0.27386170625686646, + "step": 2971 + }, + { + "epoch": 1.4685530705548004, + "grad_norm": 1.2413908046529407, + "learning_rate": 3.628278662655055e-06, + "loss": 0.259655237197876, + "step": 2972 + }, + { + "epoch": 1.4690473248486347, + "grad_norm": 1.2328404027424946, + "learning_rate": 3.6219835389897305e-06, + "loss": 0.2234620749950409, + "step": 2973 + }, + { + "epoch": 1.4695415791424689, + "grad_norm": 1.2170225214049992, + "learning_rate": 3.6156926731970664e-06, + "loss": 0.25133174657821655, + "step": 2974 + }, + { + "epoch": 1.4700358334363028, + "grad_norm": 1.4753631122763826, + "learning_rate": 3.609406069476752e-06, + "loss": 0.2856005132198334, + "step": 2975 + }, + { + "epoch": 1.470530087730137, + "grad_norm": 1.352763052735898, + "learning_rate": 3.603123732025635e-06, + "loss": 0.23760217428207397, + "step": 2976 + }, + { + "epoch": 1.4710243420239713, + "grad_norm": 1.315945468844056, + "learning_rate": 3.596845665037715e-06, + "loss": 0.2344968169927597, + "step": 2977 + }, + { + "epoch": 1.4715185963178055, + "grad_norm": 1.3513242562279373, + "learning_rate": 3.5905718727041415e-06, + "loss": 0.23936885595321655, + "step": 2978 + }, + { + "epoch": 1.4720128506116397, + "grad_norm": 1.2281537442777626, + "learning_rate": 3.584302359213204e-06, + "loss": 0.24542436003684998, + "step": 2979 + }, + { + "epoch": 1.4725071049054739, + "grad_norm": 1.2816242991916544, + "learning_rate": 3.578037128750338e-06, + "loss": 0.24754226207733154, + "step": 2980 + }, + { + "epoch": 1.473001359199308, + "grad_norm": 1.3406109779820896, + "learning_rate": 3.5717761854981335e-06, + "loss": 0.25167495012283325, + "step": 2981 + }, + { + "epoch": 1.4734956134931423, + "grad_norm": 1.2820406301810907, + "learning_rate": 3.565519533636296e-06, + "loss": 0.21352116763591766, + "step": 2982 + }, + { + "epoch": 1.4739898677869765, + "grad_norm": 1.5800404779419173, + "learning_rate": 3.5592671773416798e-06, + "loss": 0.24721838533878326, + "step": 2983 + }, + { + "epoch": 1.4744841220808107, + "grad_norm": 1.209332122723965, + "learning_rate": 3.5530191207882705e-06, + "loss": 0.2098400741815567, + "step": 2984 + }, + { + "epoch": 1.4749783763746447, + "grad_norm": 1.4059961620340085, + "learning_rate": 3.5467753681471784e-06, + "loss": 0.27138370275497437, + "step": 2985 + }, + { + "epoch": 1.475472630668479, + "grad_norm": 1.456553871591733, + "learning_rate": 3.5405359235866468e-06, + "loss": 0.2675255537033081, + "step": 2986 + }, + { + "epoch": 1.475966884962313, + "grad_norm": 1.3852192514849078, + "learning_rate": 3.5343007912720397e-06, + "loss": 0.2927984893321991, + "step": 2987 + }, + { + "epoch": 1.4764611392561473, + "grad_norm": 1.4840757807353469, + "learning_rate": 3.5280699753658354e-06, + "loss": 0.2897256910800934, + "step": 2988 + }, + { + "epoch": 1.4769553935499815, + "grad_norm": 1.3162511876956198, + "learning_rate": 3.521843480027646e-06, + "loss": 0.25903570652008057, + "step": 2989 + }, + { + "epoch": 1.4774496478438157, + "grad_norm": 1.1815962199969574, + "learning_rate": 3.515621309414191e-06, + "loss": 0.2097684144973755, + "step": 2990 + }, + { + "epoch": 1.4779439021376497, + "grad_norm": 1.368257943211956, + "learning_rate": 3.5094034676792952e-06, + "loss": 0.25807827711105347, + "step": 2991 + }, + { + "epoch": 1.478438156431484, + "grad_norm": 1.3326288392160186, + "learning_rate": 3.503189958973906e-06, + "loss": 0.24161803722381592, + "step": 2992 + }, + { + "epoch": 1.4789324107253181, + "grad_norm": 1.3735233821721475, + "learning_rate": 3.4969807874460717e-06, + "loss": 0.2612338364124298, + "step": 2993 + }, + { + "epoch": 1.4794266650191523, + "grad_norm": 1.3484776453875857, + "learning_rate": 3.490775957240947e-06, + "loss": 0.2529192566871643, + "step": 2994 + }, + { + "epoch": 1.4799209193129865, + "grad_norm": 1.376626480795096, + "learning_rate": 3.4845754725007883e-06, + "loss": 0.2616920471191406, + "step": 2995 + }, + { + "epoch": 1.4804151736068207, + "grad_norm": 1.1709509708234012, + "learning_rate": 3.4783793373649534e-06, + "loss": 0.2372770607471466, + "step": 2996 + }, + { + "epoch": 1.480909427900655, + "grad_norm": 1.6683733615888718, + "learning_rate": 3.4721875559698826e-06, + "loss": 0.2993369996547699, + "step": 2997 + }, + { + "epoch": 1.4814036821944891, + "grad_norm": 1.444631738912031, + "learning_rate": 3.4660001324491354e-06, + "loss": 0.2703147530555725, + "step": 2998 + }, + { + "epoch": 1.4818979364883234, + "grad_norm": 1.497851135078702, + "learning_rate": 3.459817070933337e-06, + "loss": 0.2909662425518036, + "step": 2999 + }, + { + "epoch": 1.4823921907821573, + "grad_norm": 1.4957339087199897, + "learning_rate": 3.4536383755502146e-06, + "loss": 0.2620519697666168, + "step": 3000 + }, + { + "epoch": 1.4828864450759915, + "grad_norm": 1.4607702963487426, + "learning_rate": 3.447464050424576e-06, + "loss": 0.2740327715873718, + "step": 3001 + }, + { + "epoch": 1.4833806993698258, + "grad_norm": 1.4051737005514326, + "learning_rate": 3.441294099678314e-06, + "loss": 0.2597920000553131, + "step": 3002 + }, + { + "epoch": 1.48387495366366, + "grad_norm": 1.2931150222772085, + "learning_rate": 3.435128527430397e-06, + "loss": 0.23138844966888428, + "step": 3003 + }, + { + "epoch": 1.4843692079574942, + "grad_norm": 1.4678522965018421, + "learning_rate": 3.428967337796879e-06, + "loss": 0.26457998156547546, + "step": 3004 + }, + { + "epoch": 1.4848634622513284, + "grad_norm": 1.3435199008351797, + "learning_rate": 3.4228105348908703e-06, + "loss": 0.22283414006233215, + "step": 3005 + }, + { + "epoch": 1.4853577165451624, + "grad_norm": 1.404722725472706, + "learning_rate": 3.416658122822576e-06, + "loss": 0.26169392466545105, + "step": 3006 + }, + { + "epoch": 1.4858519708389966, + "grad_norm": 1.3942121909077798, + "learning_rate": 3.4105101056992574e-06, + "loss": 0.22738765180110931, + "step": 3007 + }, + { + "epoch": 1.4863462251328308, + "grad_norm": 1.640113120385147, + "learning_rate": 3.404366487625237e-06, + "loss": 0.24252702295780182, + "step": 3008 + }, + { + "epoch": 1.486840479426665, + "grad_norm": 1.2658350422978366, + "learning_rate": 3.398227272701905e-06, + "loss": 0.2192659229040146, + "step": 3009 + }, + { + "epoch": 1.4873347337204992, + "grad_norm": 1.3659525117305242, + "learning_rate": 3.3920924650277253e-06, + "loss": 0.23824100196361542, + "step": 3010 + }, + { + "epoch": 1.4878289880143334, + "grad_norm": 1.304246601014088, + "learning_rate": 3.3859620686981977e-06, + "loss": 0.25558948516845703, + "step": 3011 + }, + { + "epoch": 1.4883232423081676, + "grad_norm": 1.2977660969069507, + "learning_rate": 3.3798360878058887e-06, + "loss": 0.23521414399147034, + "step": 3012 + }, + { + "epoch": 1.4888174966020018, + "grad_norm": 1.5059732923775448, + "learning_rate": 3.373714526440417e-06, + "loss": 0.26024043560028076, + "step": 3013 + }, + { + "epoch": 1.489311750895836, + "grad_norm": 1.3966534942487767, + "learning_rate": 3.3675973886884506e-06, + "loss": 0.2676945626735687, + "step": 3014 + }, + { + "epoch": 1.4898060051896702, + "grad_norm": 1.4302757106543351, + "learning_rate": 3.361484678633701e-06, + "loss": 0.29499778151512146, + "step": 3015 + }, + { + "epoch": 1.4903002594835042, + "grad_norm": 1.2541194356509255, + "learning_rate": 3.35537640035693e-06, + "loss": 0.21667227149009705, + "step": 3016 + }, + { + "epoch": 1.4907945137773384, + "grad_norm": 1.5055716214820787, + "learning_rate": 3.3492725579359288e-06, + "loss": 0.2852727770805359, + "step": 3017 + }, + { + "epoch": 1.4912887680711726, + "grad_norm": 1.3110566349547437, + "learning_rate": 3.343173155445546e-06, + "loss": 0.22535362839698792, + "step": 3018 + }, + { + "epoch": 1.4917830223650068, + "grad_norm": 1.3390943365322368, + "learning_rate": 3.3370781969576473e-06, + "loss": 0.23513402044773102, + "step": 3019 + }, + { + "epoch": 1.492277276658841, + "grad_norm": 1.34171251218287, + "learning_rate": 3.3309876865411426e-06, + "loss": 0.2343328893184662, + "step": 3020 + }, + { + "epoch": 1.4927715309526752, + "grad_norm": 1.4982279835949508, + "learning_rate": 3.3249016282619696e-06, + "loss": 0.309964656829834, + "step": 3021 + }, + { + "epoch": 1.4932657852465092, + "grad_norm": 1.4104830526650916, + "learning_rate": 3.318820026183095e-06, + "loss": 0.2678214907646179, + "step": 3022 + }, + { + "epoch": 1.4937600395403434, + "grad_norm": 1.3871314289257326, + "learning_rate": 3.312742884364508e-06, + "loss": 0.24117907881736755, + "step": 3023 + }, + { + "epoch": 1.4942542938341776, + "grad_norm": 1.4966526123322192, + "learning_rate": 3.306670206863225e-06, + "loss": 0.23572009801864624, + "step": 3024 + }, + { + "epoch": 1.4947485481280118, + "grad_norm": 1.1974970903692888, + "learning_rate": 3.3006019977332728e-06, + "loss": 0.20058652758598328, + "step": 3025 + }, + { + "epoch": 1.495242802421846, + "grad_norm": 1.4552709446661256, + "learning_rate": 3.2945382610257017e-06, + "loss": 0.2433123141527176, + "step": 3026 + }, + { + "epoch": 1.4957370567156802, + "grad_norm": 1.330592869585441, + "learning_rate": 3.2884790007885834e-06, + "loss": 0.2648032009601593, + "step": 3027 + }, + { + "epoch": 1.4962313110095145, + "grad_norm": 1.4274009022113794, + "learning_rate": 3.2824242210669853e-06, + "loss": 0.23508986830711365, + "step": 3028 + }, + { + "epoch": 1.4967255653033487, + "grad_norm": 1.337116326245031, + "learning_rate": 3.2763739259029946e-06, + "loss": 0.2340327799320221, + "step": 3029 + }, + { + "epoch": 1.4972198195971829, + "grad_norm": 1.4724312525996526, + "learning_rate": 3.2703281193357028e-06, + "loss": 0.24071671068668365, + "step": 3030 + }, + { + "epoch": 1.4977140738910169, + "grad_norm": 1.4191732736253682, + "learning_rate": 3.264286805401203e-06, + "loss": 0.26332271099090576, + "step": 3031 + }, + { + "epoch": 1.498208328184851, + "grad_norm": 1.266600605298302, + "learning_rate": 3.2582499881325904e-06, + "loss": 0.21818014979362488, + "step": 3032 + }, + { + "epoch": 1.4987025824786853, + "grad_norm": 1.3340246980776698, + "learning_rate": 3.2522176715599606e-06, + "loss": 0.26997917890548706, + "step": 3033 + }, + { + "epoch": 1.4991968367725195, + "grad_norm": 1.4818331950802985, + "learning_rate": 3.2461898597103935e-06, + "loss": 0.21703608334064484, + "step": 3034 + }, + { + "epoch": 1.4996910910663537, + "grad_norm": 1.287764216628678, + "learning_rate": 3.240166556607979e-06, + "loss": 0.24345526099205017, + "step": 3035 + }, + { + "epoch": 1.5001853453601877, + "grad_norm": 1.2134455175661707, + "learning_rate": 3.2341477662737877e-06, + "loss": 0.2428402602672577, + "step": 3036 + }, + { + "epoch": 1.5006795996540219, + "grad_norm": 1.389226279044202, + "learning_rate": 3.228133492725872e-06, + "loss": 0.234619602560997, + "step": 3037 + }, + { + "epoch": 1.501173853947856, + "grad_norm": 1.3308420188359134, + "learning_rate": 3.2221237399792784e-06, + "loss": 0.27995944023132324, + "step": 3038 + }, + { + "epoch": 1.5016681082416903, + "grad_norm": 1.283844133259085, + "learning_rate": 3.2161185120460327e-06, + "loss": 0.23708665370941162, + "step": 3039 + }, + { + "epoch": 1.5021623625355245, + "grad_norm": 1.3268773172813266, + "learning_rate": 3.2101178129351373e-06, + "loss": 0.2541486620903015, + "step": 3040 + }, + { + "epoch": 1.5026566168293587, + "grad_norm": 1.2735534589560005, + "learning_rate": 3.204121646652576e-06, + "loss": 0.2281494140625, + "step": 3041 + }, + { + "epoch": 1.503150871123193, + "grad_norm": 1.4214183804465141, + "learning_rate": 3.1981300172013006e-06, + "loss": 0.24793995916843414, + "step": 3042 + }, + { + "epoch": 1.503645125417027, + "grad_norm": 1.3820844339773122, + "learning_rate": 3.19214292858124e-06, + "loss": 0.25877612829208374, + "step": 3043 + }, + { + "epoch": 1.5041393797108613, + "grad_norm": 1.2606638362034603, + "learning_rate": 3.1861603847892907e-06, + "loss": 0.23822908103466034, + "step": 3044 + }, + { + "epoch": 1.5046336340046955, + "grad_norm": 1.3375723790086107, + "learning_rate": 3.1801823898193075e-06, + "loss": 0.2450297623872757, + "step": 3045 + }, + { + "epoch": 1.5051278882985297, + "grad_norm": 1.291286771303469, + "learning_rate": 3.1742089476621176e-06, + "loss": 0.23657044768333435, + "step": 3046 + }, + { + "epoch": 1.505622142592364, + "grad_norm": 1.330327819651038, + "learning_rate": 3.1682400623055043e-06, + "loss": 0.22040539979934692, + "step": 3047 + }, + { + "epoch": 1.506116396886198, + "grad_norm": 1.2295078748580162, + "learning_rate": 3.162275737734213e-06, + "loss": 0.24671347439289093, + "step": 3048 + }, + { + "epoch": 1.5066106511800321, + "grad_norm": 1.3193055288047242, + "learning_rate": 3.156315977929939e-06, + "loss": 0.2590971291065216, + "step": 3049 + }, + { + "epoch": 1.5071049054738663, + "grad_norm": 1.3201796395435559, + "learning_rate": 3.1503607868713383e-06, + "loss": 0.2650923430919647, + "step": 3050 + }, + { + "epoch": 1.5075991597677005, + "grad_norm": 1.3124240495866886, + "learning_rate": 3.1444101685339987e-06, + "loss": 0.22146420180797577, + "step": 3051 + }, + { + "epoch": 1.5080934140615345, + "grad_norm": 1.3875424644692997, + "learning_rate": 3.1384641268904804e-06, + "loss": 0.26743125915527344, + "step": 3052 + }, + { + "epoch": 1.5085876683553687, + "grad_norm": 1.4406215302595167, + "learning_rate": 3.1325226659102746e-06, + "loss": 0.24730908870697021, + "step": 3053 + }, + { + "epoch": 1.509081922649203, + "grad_norm": 1.3933207280707873, + "learning_rate": 3.1265857895598094e-06, + "loss": 0.26301079988479614, + "step": 3054 + }, + { + "epoch": 1.5095761769430371, + "grad_norm": 1.2589035946994764, + "learning_rate": 3.1206535018024598e-06, + "loss": 0.22815877199172974, + "step": 3055 + }, + { + "epoch": 1.5100704312368713, + "grad_norm": 1.533757049437193, + "learning_rate": 3.114725806598544e-06, + "loss": 0.25178754329681396, + "step": 3056 + }, + { + "epoch": 1.5105646855307056, + "grad_norm": 1.3661154596053653, + "learning_rate": 3.1088027079052973e-06, + "loss": 0.20269548892974854, + "step": 3057 + }, + { + "epoch": 1.5110589398245398, + "grad_norm": 1.4014331356202114, + "learning_rate": 3.1028842096769006e-06, + "loss": 0.25972461700439453, + "step": 3058 + }, + { + "epoch": 1.511553194118374, + "grad_norm": 1.3745096869790834, + "learning_rate": 3.0969703158644583e-06, + "loss": 0.23313641548156738, + "step": 3059 + }, + { + "epoch": 1.5120474484122082, + "grad_norm": 1.2941298023610517, + "learning_rate": 3.0910610304159993e-06, + "loss": 0.2359476238489151, + "step": 3060 + }, + { + "epoch": 1.5125417027060424, + "grad_norm": 1.3631605592123968, + "learning_rate": 3.085156357276481e-06, + "loss": 0.263039767742157, + "step": 3061 + }, + { + "epoch": 1.5130359569998766, + "grad_norm": 1.4414947958352682, + "learning_rate": 3.0792563003877795e-06, + "loss": 0.2222701609134674, + "step": 3062 + }, + { + "epoch": 1.5135302112937106, + "grad_norm": 1.5152386602086467, + "learning_rate": 3.0733608636886815e-06, + "loss": 0.2511240839958191, + "step": 3063 + }, + { + "epoch": 1.5140244655875448, + "grad_norm": 1.3426863589238012, + "learning_rate": 3.0674700511149057e-06, + "loss": 0.26376873254776, + "step": 3064 + }, + { + "epoch": 1.514518719881379, + "grad_norm": 1.50705834278763, + "learning_rate": 3.0615838665990685e-06, + "loss": 0.2883176803588867, + "step": 3065 + }, + { + "epoch": 1.5150129741752132, + "grad_norm": 1.4534493774446482, + "learning_rate": 3.055702314070703e-06, + "loss": 0.2641439437866211, + "step": 3066 + }, + { + "epoch": 1.5155072284690472, + "grad_norm": 1.2206107550113217, + "learning_rate": 3.049825397456252e-06, + "loss": 0.22250229120254517, + "step": 3067 + }, + { + "epoch": 1.5160014827628814, + "grad_norm": 1.6917159383624243, + "learning_rate": 3.0439531206790585e-06, + "loss": 0.291684091091156, + "step": 3068 + }, + { + "epoch": 1.5164957370567156, + "grad_norm": 1.2582948861406589, + "learning_rate": 3.0380854876593725e-06, + "loss": 0.22581104934215546, + "step": 3069 + }, + { + "epoch": 1.5169899913505498, + "grad_norm": 1.3218689478609282, + "learning_rate": 3.032222502314345e-06, + "loss": 0.22701920568943024, + "step": 3070 + }, + { + "epoch": 1.517484245644384, + "grad_norm": 1.4011754473371674, + "learning_rate": 3.0263641685580134e-06, + "loss": 0.27151840925216675, + "step": 3071 + }, + { + "epoch": 1.5179784999382182, + "grad_norm": 1.4319870241234463, + "learning_rate": 3.0205104903013183e-06, + "loss": 0.25780510902404785, + "step": 3072 + }, + { + "epoch": 1.5184727542320524, + "grad_norm": 1.232949136662072, + "learning_rate": 3.014661471452103e-06, + "loss": 0.23905009031295776, + "step": 3073 + }, + { + "epoch": 1.5189670085258866, + "grad_norm": 1.296685135563547, + "learning_rate": 3.0088171159150758e-06, + "loss": 0.25984710454940796, + "step": 3074 + }, + { + "epoch": 1.5194612628197208, + "grad_norm": 1.5925440917505933, + "learning_rate": 3.0029774275918523e-06, + "loss": 0.24934321641921997, + "step": 3075 + }, + { + "epoch": 1.519955517113555, + "grad_norm": 1.3570253725800296, + "learning_rate": 2.997142410380921e-06, + "loss": 0.24181538820266724, + "step": 3076 + }, + { + "epoch": 1.5204497714073892, + "grad_norm": 1.4224922399256614, + "learning_rate": 2.9913120681776586e-06, + "loss": 0.28867265582084656, + "step": 3077 + }, + { + "epoch": 1.5209440257012234, + "grad_norm": 1.3689537883355085, + "learning_rate": 2.9854864048743183e-06, + "loss": 0.25082239508628845, + "step": 3078 + }, + { + "epoch": 1.5214382799950574, + "grad_norm": 1.1809552467181543, + "learning_rate": 2.979665424360031e-06, + "loss": 0.21152186393737793, + "step": 3079 + }, + { + "epoch": 1.5219325342888916, + "grad_norm": 1.3255328033562375, + "learning_rate": 2.9738491305207926e-06, + "loss": 0.22989922761917114, + "step": 3080 + }, + { + "epoch": 1.5224267885827258, + "grad_norm": 1.4352789035320561, + "learning_rate": 2.9680375272394855e-06, + "loss": 0.21606113016605377, + "step": 3081 + }, + { + "epoch": 1.5229210428765598, + "grad_norm": 1.2795767684328416, + "learning_rate": 2.962230618395855e-06, + "loss": 0.25060969591140747, + "step": 3082 + }, + { + "epoch": 1.523415297170394, + "grad_norm": 1.4409246111783223, + "learning_rate": 2.9564284078665016e-06, + "loss": 0.2574993371963501, + "step": 3083 + }, + { + "epoch": 1.5239095514642282, + "grad_norm": 1.3476850353049301, + "learning_rate": 2.9506308995249035e-06, + "loss": 0.2552590072154999, + "step": 3084 + }, + { + "epoch": 1.5244038057580624, + "grad_norm": 1.4294064187721107, + "learning_rate": 2.9448380972413936e-06, + "loss": 0.2356393188238144, + "step": 3085 + }, + { + "epoch": 1.5248980600518967, + "grad_norm": 1.2956637091449177, + "learning_rate": 2.939050004883164e-06, + "loss": 0.25111299753189087, + "step": 3086 + }, + { + "epoch": 1.5253923143457309, + "grad_norm": 1.6187968050107684, + "learning_rate": 2.933266626314263e-06, + "loss": 0.2713226079940796, + "step": 3087 + }, + { + "epoch": 1.525886568639565, + "grad_norm": 1.371480760416421, + "learning_rate": 2.92748796539559e-06, + "loss": 0.2493591606616974, + "step": 3088 + }, + { + "epoch": 1.5263808229333993, + "grad_norm": 1.3919253891743593, + "learning_rate": 2.9217140259848984e-06, + "loss": 0.2377934455871582, + "step": 3089 + }, + { + "epoch": 1.5268750772272335, + "grad_norm": 1.222188939870737, + "learning_rate": 2.9159448119367896e-06, + "loss": 0.23113523423671722, + "step": 3090 + }, + { + "epoch": 1.5273693315210677, + "grad_norm": 1.3071786210451368, + "learning_rate": 2.910180327102702e-06, + "loss": 0.2212657630443573, + "step": 3091 + }, + { + "epoch": 1.527863585814902, + "grad_norm": 1.4809706556535216, + "learning_rate": 2.904420575330923e-06, + "loss": 0.3317147195339203, + "step": 3092 + }, + { + "epoch": 1.528357840108736, + "grad_norm": 1.222501836116789, + "learning_rate": 2.8986655604665914e-06, + "loss": 0.21677865087985992, + "step": 3093 + }, + { + "epoch": 1.52885209440257, + "grad_norm": 1.4687657258901345, + "learning_rate": 2.892915286351663e-06, + "loss": 0.2719038724899292, + "step": 3094 + }, + { + "epoch": 1.5293463486964043, + "grad_norm": 1.4800981330468082, + "learning_rate": 2.887169756824941e-06, + "loss": 0.2870655953884125, + "step": 3095 + }, + { + "epoch": 1.5298406029902385, + "grad_norm": 1.6050530390151894, + "learning_rate": 2.8814289757220636e-06, + "loss": 0.27370864152908325, + "step": 3096 + }, + { + "epoch": 1.5303348572840727, + "grad_norm": 1.2925821727625635, + "learning_rate": 2.8756929468754834e-06, + "loss": 0.24579623341560364, + "step": 3097 + }, + { + "epoch": 1.5308291115779067, + "grad_norm": 1.5466324939604184, + "learning_rate": 2.869961674114501e-06, + "loss": 0.25092196464538574, + "step": 3098 + }, + { + "epoch": 1.531323365871741, + "grad_norm": 1.539826368870157, + "learning_rate": 2.864235161265232e-06, + "loss": 0.29637211561203003, + "step": 3099 + }, + { + "epoch": 1.531817620165575, + "grad_norm": 1.346232107313421, + "learning_rate": 2.8585134121506086e-06, + "loss": 0.24216854572296143, + "step": 3100 + }, + { + "epoch": 1.5323118744594093, + "grad_norm": 1.264644352464564, + "learning_rate": 2.8527964305903887e-06, + "loss": 0.2050018608570099, + "step": 3101 + }, + { + "epoch": 1.5328061287532435, + "grad_norm": 1.4429594327267479, + "learning_rate": 2.8470842204011562e-06, + "loss": 0.2323600798845291, + "step": 3102 + }, + { + "epoch": 1.5333003830470777, + "grad_norm": 1.3588986581117766, + "learning_rate": 2.8413767853962937e-06, + "loss": 0.2582741379737854, + "step": 3103 + }, + { + "epoch": 1.533794637340912, + "grad_norm": 1.2503142010331656, + "learning_rate": 2.8356741293860034e-06, + "loss": 0.2190069705247879, + "step": 3104 + }, + { + "epoch": 1.5342888916347461, + "grad_norm": 1.2700906528895424, + "learning_rate": 2.8299762561773004e-06, + "loss": 0.2293972671031952, + "step": 3105 + }, + { + "epoch": 1.5347831459285803, + "grad_norm": 1.4604730845156306, + "learning_rate": 2.8242831695740004e-06, + "loss": 0.28793102502822876, + "step": 3106 + }, + { + "epoch": 1.5352774002224145, + "grad_norm": 1.3871033704581968, + "learning_rate": 2.8185948733767276e-06, + "loss": 0.25700464844703674, + "step": 3107 + }, + { + "epoch": 1.5357716545162488, + "grad_norm": 1.6036334059609652, + "learning_rate": 2.8129113713829115e-06, + "loss": 0.2633448541164398, + "step": 3108 + }, + { + "epoch": 1.5362659088100827, + "grad_norm": 1.2623866770143863, + "learning_rate": 2.8072326673867667e-06, + "loss": 0.2363145351409912, + "step": 3109 + }, + { + "epoch": 1.536760163103917, + "grad_norm": 1.3073287831639788, + "learning_rate": 2.8015587651793273e-06, + "loss": 0.24324053525924683, + "step": 3110 + }, + { + "epoch": 1.5372544173977511, + "grad_norm": 1.445888976457047, + "learning_rate": 2.795889668548399e-06, + "loss": 0.24139198660850525, + "step": 3111 + }, + { + "epoch": 1.5377486716915854, + "grad_norm": 1.3070463104686283, + "learning_rate": 2.790225381278595e-06, + "loss": 0.2502334713935852, + "step": 3112 + }, + { + "epoch": 1.5382429259854193, + "grad_norm": 1.3233606598015195, + "learning_rate": 2.784565907151311e-06, + "loss": 0.24635109305381775, + "step": 3113 + }, + { + "epoch": 1.5387371802792535, + "grad_norm": 1.236974627125298, + "learning_rate": 2.7789112499447312e-06, + "loss": 0.2299586534500122, + "step": 3114 + }, + { + "epoch": 1.5392314345730878, + "grad_norm": 1.232633224868461, + "learning_rate": 2.7732614134338243e-06, + "loss": 0.2296627312898636, + "step": 3115 + }, + { + "epoch": 1.539725688866922, + "grad_norm": 1.3919487561893158, + "learning_rate": 2.767616401390343e-06, + "loss": 0.26127320528030396, + "step": 3116 + }, + { + "epoch": 1.5402199431607562, + "grad_norm": 1.3612758454379796, + "learning_rate": 2.761976217582808e-06, + "loss": 0.24718445539474487, + "step": 3117 + }, + { + "epoch": 1.5407141974545904, + "grad_norm": 1.3000063965271036, + "learning_rate": 2.7563408657765345e-06, + "loss": 0.22314362227916718, + "step": 3118 + }, + { + "epoch": 1.5412084517484246, + "grad_norm": 1.2190954536725822, + "learning_rate": 2.750710349733602e-06, + "loss": 0.2288416028022766, + "step": 3119 + }, + { + "epoch": 1.5417027060422588, + "grad_norm": 1.3774388084670495, + "learning_rate": 2.7450846732128577e-06, + "loss": 0.26181158423423767, + "step": 3120 + }, + { + "epoch": 1.542196960336093, + "grad_norm": 1.2123920647911897, + "learning_rate": 2.739463839969926e-06, + "loss": 0.22397834062576294, + "step": 3121 + }, + { + "epoch": 1.5426912146299272, + "grad_norm": 1.4361842348504215, + "learning_rate": 2.7338478537571943e-06, + "loss": 0.23633858561515808, + "step": 3122 + }, + { + "epoch": 1.5431854689237614, + "grad_norm": 1.402092217147563, + "learning_rate": 2.7282367183238143e-06, + "loss": 0.26719149947166443, + "step": 3123 + }, + { + "epoch": 1.5436797232175956, + "grad_norm": 1.5260713360749147, + "learning_rate": 2.722630437415701e-06, + "loss": 0.2882165014743805, + "step": 3124 + }, + { + "epoch": 1.5441739775114296, + "grad_norm": 1.258294682394544, + "learning_rate": 2.7170290147755285e-06, + "loss": 0.2377905696630478, + "step": 3125 + }, + { + "epoch": 1.5446682318052638, + "grad_norm": 1.3195147017546947, + "learning_rate": 2.7114324541427193e-06, + "loss": 0.2705368399620056, + "step": 3126 + }, + { + "epoch": 1.545162486099098, + "grad_norm": 1.2857701503132921, + "learning_rate": 2.7058407592534663e-06, + "loss": 0.246593177318573, + "step": 3127 + }, + { + "epoch": 1.5456567403929322, + "grad_norm": 1.33265619524068, + "learning_rate": 2.700253933840705e-06, + "loss": 0.2339816391468048, + "step": 3128 + }, + { + "epoch": 1.5461509946867662, + "grad_norm": 1.3254997645322988, + "learning_rate": 2.6946719816341127e-06, + "loss": 0.2727898359298706, + "step": 3129 + }, + { + "epoch": 1.5466452489806004, + "grad_norm": 1.483440007746236, + "learning_rate": 2.6890949063601255e-06, + "loss": 0.285343736410141, + "step": 3130 + }, + { + "epoch": 1.5471395032744346, + "grad_norm": 1.4219498161281177, + "learning_rate": 2.6835227117419184e-06, + "loss": 0.25782397389411926, + "step": 3131 + }, + { + "epoch": 1.5476337575682688, + "grad_norm": 1.4096561970820742, + "learning_rate": 2.67795540149941e-06, + "loss": 0.26677054166793823, + "step": 3132 + }, + { + "epoch": 1.548128011862103, + "grad_norm": 1.375758748898483, + "learning_rate": 2.6723929793492555e-06, + "loss": 0.2696993052959442, + "step": 3133 + }, + { + "epoch": 1.5486222661559372, + "grad_norm": 1.3214248540646165, + "learning_rate": 2.66683544900485e-06, + "loss": 0.2536013424396515, + "step": 3134 + }, + { + "epoch": 1.5491165204497714, + "grad_norm": 1.352660590997614, + "learning_rate": 2.661282814176319e-06, + "loss": 0.2583885192871094, + "step": 3135 + }, + { + "epoch": 1.5496107747436056, + "grad_norm": 1.3555750519784333, + "learning_rate": 2.655735078570528e-06, + "loss": 0.24341340363025665, + "step": 3136 + }, + { + "epoch": 1.5501050290374399, + "grad_norm": 1.3694743065317843, + "learning_rate": 2.650192245891059e-06, + "loss": 0.2575637698173523, + "step": 3137 + }, + { + "epoch": 1.550599283331274, + "grad_norm": 1.3743479794773286, + "learning_rate": 2.644654319838227e-06, + "loss": 0.24109753966331482, + "step": 3138 + }, + { + "epoch": 1.5510935376251083, + "grad_norm": 1.2822421062589742, + "learning_rate": 2.6391213041090822e-06, + "loss": 0.246525377035141, + "step": 3139 + }, + { + "epoch": 1.5515877919189422, + "grad_norm": 1.3144657839500415, + "learning_rate": 2.6335932023973777e-06, + "loss": 0.2589566111564636, + "step": 3140 + }, + { + "epoch": 1.5520820462127765, + "grad_norm": 1.333811387247849, + "learning_rate": 2.628070018393598e-06, + "loss": 0.26198744773864746, + "step": 3141 + }, + { + "epoch": 1.5525763005066107, + "grad_norm": 1.2808916237604833, + "learning_rate": 2.622551755784942e-06, + "loss": 0.22991782426834106, + "step": 3142 + }, + { + "epoch": 1.5530705548004449, + "grad_norm": 1.242582313641482, + "learning_rate": 2.6170384182553244e-06, + "loss": 0.22211629152297974, + "step": 3143 + }, + { + "epoch": 1.5535648090942789, + "grad_norm": 1.306994517774283, + "learning_rate": 2.6115300094853666e-06, + "loss": 0.2665289640426636, + "step": 3144 + }, + { + "epoch": 1.554059063388113, + "grad_norm": 1.260713008188702, + "learning_rate": 2.6060265331524114e-06, + "loss": 0.20211085677146912, + "step": 3145 + }, + { + "epoch": 1.5545533176819473, + "grad_norm": 1.3930467289400041, + "learning_rate": 2.6005279929304918e-06, + "loss": 0.24264919757843018, + "step": 3146 + }, + { + "epoch": 1.5550475719757815, + "grad_norm": 1.316241217623005, + "learning_rate": 2.595034392490354e-06, + "loss": 0.2722601294517517, + "step": 3147 + }, + { + "epoch": 1.5555418262696157, + "grad_norm": 1.3463437829147908, + "learning_rate": 2.58954573549946e-06, + "loss": 0.26061201095581055, + "step": 3148 + }, + { + "epoch": 1.5560360805634499, + "grad_norm": 1.3701131034296847, + "learning_rate": 2.5840620256219464e-06, + "loss": 0.20620305836200714, + "step": 3149 + }, + { + "epoch": 1.556530334857284, + "grad_norm": 1.3323948648350379, + "learning_rate": 2.578583266518664e-06, + "loss": 0.2424723207950592, + "step": 3150 + }, + { + "epoch": 1.5570245891511183, + "grad_norm": 1.4286998078779003, + "learning_rate": 2.573109461847153e-06, + "loss": 0.248019739985466, + "step": 3151 + }, + { + "epoch": 1.5575188434449525, + "grad_norm": 1.2753051030343154, + "learning_rate": 2.5676406152616483e-06, + "loss": 0.23162522912025452, + "step": 3152 + }, + { + "epoch": 1.5580130977387867, + "grad_norm": 1.6072180292151754, + "learning_rate": 2.562176730413074e-06, + "loss": 0.20099176466464996, + "step": 3153 + }, + { + "epoch": 1.558507352032621, + "grad_norm": 1.4868098360756863, + "learning_rate": 2.5567178109490433e-06, + "loss": 0.27957430481910706, + "step": 3154 + }, + { + "epoch": 1.5590016063264551, + "grad_norm": 1.248830156095604, + "learning_rate": 2.551263860513845e-06, + "loss": 0.23941464722156525, + "step": 3155 + }, + { + "epoch": 1.559495860620289, + "grad_norm": 1.4371594834198067, + "learning_rate": 2.5458148827484695e-06, + "loss": 0.24910275638103485, + "step": 3156 + }, + { + "epoch": 1.5599901149141233, + "grad_norm": 1.325153365111165, + "learning_rate": 2.540370881290568e-06, + "loss": 0.26430344581604004, + "step": 3157 + }, + { + "epoch": 1.5604843692079575, + "grad_norm": 1.419775898075986, + "learning_rate": 2.534931859774481e-06, + "loss": 0.2833614945411682, + "step": 3158 + }, + { + "epoch": 1.5609786235017915, + "grad_norm": 1.2863995969426358, + "learning_rate": 2.5294978218312215e-06, + "loss": 0.24630708992481232, + "step": 3159 + }, + { + "epoch": 1.5614728777956257, + "grad_norm": 1.398973984381973, + "learning_rate": 2.524068771088476e-06, + "loss": 0.2674857974052429, + "step": 3160 + }, + { + "epoch": 1.56196713208946, + "grad_norm": 1.34356245737179, + "learning_rate": 2.5186447111706005e-06, + "loss": 0.23531441390514374, + "step": 3161 + }, + { + "epoch": 1.5624613863832941, + "grad_norm": 1.2374731185400574, + "learning_rate": 2.5132256456986236e-06, + "loss": 0.2603223919868469, + "step": 3162 + }, + { + "epoch": 1.5629556406771283, + "grad_norm": 1.302457785178724, + "learning_rate": 2.5078115782902267e-06, + "loss": 0.220007985830307, + "step": 3163 + }, + { + "epoch": 1.5634498949709625, + "grad_norm": 1.36046018530454, + "learning_rate": 2.502402512559773e-06, + "loss": 0.22660651803016663, + "step": 3164 + }, + { + "epoch": 1.5639441492647967, + "grad_norm": 1.4627286861974862, + "learning_rate": 2.4969984521182766e-06, + "loss": 0.26425695419311523, + "step": 3165 + }, + { + "epoch": 1.564438403558631, + "grad_norm": 1.3019070428865334, + "learning_rate": 2.4915994005734057e-06, + "loss": 0.22870787978172302, + "step": 3166 + }, + { + "epoch": 1.5649326578524652, + "grad_norm": 1.2622414815912377, + "learning_rate": 2.48620536152949e-06, + "loss": 0.25734084844589233, + "step": 3167 + }, + { + "epoch": 1.5654269121462994, + "grad_norm": 1.2954820564672134, + "learning_rate": 2.4808163385875226e-06, + "loss": 0.24831843376159668, + "step": 3168 + }, + { + "epoch": 1.5659211664401336, + "grad_norm": 1.3356720372460569, + "learning_rate": 2.4754323353451284e-06, + "loss": 0.2389685958623886, + "step": 3169 + }, + { + "epoch": 1.5664154207339678, + "grad_norm": 1.33182477221405, + "learning_rate": 2.4700533553965946e-06, + "loss": 0.24750663340091705, + "step": 3170 + }, + { + "epoch": 1.5669096750278018, + "grad_norm": 1.4158946259185428, + "learning_rate": 2.4646794023328525e-06, + "loss": 0.2689003348350525, + "step": 3171 + }, + { + "epoch": 1.567403929321636, + "grad_norm": 1.32371836304635, + "learning_rate": 2.45931047974147e-06, + "loss": 0.2574145197868347, + "step": 3172 + }, + { + "epoch": 1.5678981836154702, + "grad_norm": 1.5403400973166155, + "learning_rate": 2.4539465912066706e-06, + "loss": 0.2586211562156677, + "step": 3173 + }, + { + "epoch": 1.5683924379093044, + "grad_norm": 1.340393455505496, + "learning_rate": 2.4485877403093095e-06, + "loss": 0.26383671164512634, + "step": 3174 + }, + { + "epoch": 1.5688866922031384, + "grad_norm": 1.2806590186816509, + "learning_rate": 2.4432339306268736e-06, + "loss": 0.28196123242378235, + "step": 3175 + }, + { + "epoch": 1.5693809464969726, + "grad_norm": 1.4692337066995136, + "learning_rate": 2.4378851657334923e-06, + "loss": 0.2736835181713104, + "step": 3176 + }, + { + "epoch": 1.5698752007908068, + "grad_norm": 1.3442483287569258, + "learning_rate": 2.4325414491999255e-06, + "loss": 0.2316201627254486, + "step": 3177 + }, + { + "epoch": 1.570369455084641, + "grad_norm": 1.363437265904272, + "learning_rate": 2.427202784593562e-06, + "loss": 0.23955810070037842, + "step": 3178 + }, + { + "epoch": 1.5708637093784752, + "grad_norm": 1.4240865879172782, + "learning_rate": 2.4218691754784162e-06, + "loss": 0.263042151927948, + "step": 3179 + }, + { + "epoch": 1.5713579636723094, + "grad_norm": 1.3283544396978941, + "learning_rate": 2.4165406254151312e-06, + "loss": 0.25570976734161377, + "step": 3180 + }, + { + "epoch": 1.5718522179661436, + "grad_norm": 1.3508561425487733, + "learning_rate": 2.4112171379609696e-06, + "loss": 0.2503488063812256, + "step": 3181 + }, + { + "epoch": 1.5723464722599778, + "grad_norm": 1.2731349274514334, + "learning_rate": 2.40589871666982e-06, + "loss": 0.21815824508666992, + "step": 3182 + }, + { + "epoch": 1.572840726553812, + "grad_norm": 1.4354076907799536, + "learning_rate": 2.400585365092177e-06, + "loss": 0.23936739563941956, + "step": 3183 + }, + { + "epoch": 1.5733349808476462, + "grad_norm": 1.2459112031686363, + "learning_rate": 2.3952770867751595e-06, + "loss": 0.2618086636066437, + "step": 3184 + }, + { + "epoch": 1.5738292351414804, + "grad_norm": 1.477109441631464, + "learning_rate": 2.3899738852625065e-06, + "loss": 0.2852020263671875, + "step": 3185 + }, + { + "epoch": 1.5743234894353144, + "grad_norm": 1.4364121007652697, + "learning_rate": 2.3846757640945505e-06, + "loss": 0.28860047459602356, + "step": 3186 + }, + { + "epoch": 1.5748177437291486, + "grad_norm": 1.2738328733534112, + "learning_rate": 2.3793827268082446e-06, + "loss": 0.2397383451461792, + "step": 3187 + }, + { + "epoch": 1.5753119980229828, + "grad_norm": 1.3548543446694599, + "learning_rate": 2.374094776937145e-06, + "loss": 0.25204962491989136, + "step": 3188 + }, + { + "epoch": 1.575806252316817, + "grad_norm": 1.2908932541507008, + "learning_rate": 2.368811918011411e-06, + "loss": 0.21216189861297607, + "step": 3189 + }, + { + "epoch": 1.576300506610651, + "grad_norm": 1.4719289728075926, + "learning_rate": 2.363534153557805e-06, + "loss": 0.2647620737552643, + "step": 3190 + }, + { + "epoch": 1.5767947609044852, + "grad_norm": 1.4154428976481128, + "learning_rate": 2.358261487099688e-06, + "loss": 0.3079666793346405, + "step": 3191 + }, + { + "epoch": 1.5772890151983194, + "grad_norm": 1.1847060614906242, + "learning_rate": 2.352993922157013e-06, + "loss": 0.22961711883544922, + "step": 3192 + }, + { + "epoch": 1.5777832694921536, + "grad_norm": 1.5460794294977342, + "learning_rate": 2.347731462246331e-06, + "loss": 0.2657305598258972, + "step": 3193 + }, + { + "epoch": 1.5782775237859878, + "grad_norm": 1.2622234684788671, + "learning_rate": 2.3424741108807914e-06, + "loss": 0.224237859249115, + "step": 3194 + }, + { + "epoch": 1.578771778079822, + "grad_norm": 1.4036688905605132, + "learning_rate": 2.337221871570121e-06, + "loss": 0.26459985971450806, + "step": 3195 + }, + { + "epoch": 1.5792660323736563, + "grad_norm": 1.4237290486306964, + "learning_rate": 2.331974747820641e-06, + "loss": 0.25391027331352234, + "step": 3196 + }, + { + "epoch": 1.5797602866674905, + "grad_norm": 1.3683418214908574, + "learning_rate": 2.326732743135256e-06, + "loss": 0.25822141766548157, + "step": 3197 + }, + { + "epoch": 1.5802545409613247, + "grad_norm": 1.3569651988075904, + "learning_rate": 2.3214958610134554e-06, + "loss": 0.25140073895454407, + "step": 3198 + }, + { + "epoch": 1.5807487952551589, + "grad_norm": 1.280802230226295, + "learning_rate": 2.3162641049513035e-06, + "loss": 0.2550397515296936, + "step": 3199 + }, + { + "epoch": 1.581243049548993, + "grad_norm": 1.3770416210337255, + "learning_rate": 2.3110374784414526e-06, + "loss": 0.2648996412754059, + "step": 3200 + }, + { + "epoch": 1.5817373038428273, + "grad_norm": 1.285627272529884, + "learning_rate": 2.3058159849731134e-06, + "loss": 0.235626682639122, + "step": 3201 + }, + { + "epoch": 1.5822315581366613, + "grad_norm": 1.354562155318599, + "learning_rate": 2.3005996280320873e-06, + "loss": 0.24930328130722046, + "step": 3202 + }, + { + "epoch": 1.5827258124304955, + "grad_norm": 1.299026803187305, + "learning_rate": 2.2953884111007428e-06, + "loss": 0.23712117969989777, + "step": 3203 + }, + { + "epoch": 1.5832200667243297, + "grad_norm": 1.4407443338733177, + "learning_rate": 2.290182337658007e-06, + "loss": 0.2504096031188965, + "step": 3204 + }, + { + "epoch": 1.583714321018164, + "grad_norm": 1.345261370550347, + "learning_rate": 2.2849814111793823e-06, + "loss": 0.2218465358018875, + "step": 3205 + }, + { + "epoch": 1.5842085753119979, + "grad_norm": 1.3818182639369938, + "learning_rate": 2.279785635136933e-06, + "loss": 0.2653011977672577, + "step": 3206 + }, + { + "epoch": 1.584702829605832, + "grad_norm": 1.521658991035551, + "learning_rate": 2.2745950129992853e-06, + "loss": 0.27551597356796265, + "step": 3207 + }, + { + "epoch": 1.5851970838996663, + "grad_norm": 1.2816405701256748, + "learning_rate": 2.2694095482316247e-06, + "loss": 0.21494519710540771, + "step": 3208 + }, + { + "epoch": 1.5856913381935005, + "grad_norm": 1.2804333364342155, + "learning_rate": 2.2642292442956925e-06, + "loss": 0.2517405152320862, + "step": 3209 + }, + { + "epoch": 1.5861855924873347, + "grad_norm": 1.365131298274178, + "learning_rate": 2.259054104649786e-06, + "loss": 0.25777050852775574, + "step": 3210 + }, + { + "epoch": 1.586679846781169, + "grad_norm": 1.3722239172040558, + "learning_rate": 2.2538841327487582e-06, + "loss": 0.25914469361305237, + "step": 3211 + }, + { + "epoch": 1.5871741010750031, + "grad_norm": 1.3924091851436682, + "learning_rate": 2.2487193320440017e-06, + "loss": 0.23877818882465363, + "step": 3212 + }, + { + "epoch": 1.5876683553688373, + "grad_norm": 1.2757007530985867, + "learning_rate": 2.2435597059834635e-06, + "loss": 0.2226967066526413, + "step": 3213 + }, + { + "epoch": 1.5881626096626715, + "grad_norm": 1.400079876174728, + "learning_rate": 2.2384052580116465e-06, + "loss": 0.28768399357795715, + "step": 3214 + }, + { + "epoch": 1.5886568639565057, + "grad_norm": 1.3700126786923876, + "learning_rate": 2.233255991569575e-06, + "loss": 0.2563883662223816, + "step": 3215 + }, + { + "epoch": 1.58915111825034, + "grad_norm": 1.3688176323163237, + "learning_rate": 2.2281119100948322e-06, + "loss": 0.2595394551753998, + "step": 3216 + }, + { + "epoch": 1.589645372544174, + "grad_norm": 1.2924408591101029, + "learning_rate": 2.2229730170215324e-06, + "loss": 0.2354460060596466, + "step": 3217 + }, + { + "epoch": 1.5901396268380081, + "grad_norm": 1.3015321221613778, + "learning_rate": 2.2178393157803225e-06, + "loss": 0.2397463619709015, + "step": 3218 + }, + { + "epoch": 1.5906338811318423, + "grad_norm": 1.5213512082778142, + "learning_rate": 2.212710809798393e-06, + "loss": 0.3304588794708252, + "step": 3219 + }, + { + "epoch": 1.5911281354256765, + "grad_norm": 1.2850321771259765, + "learning_rate": 2.207587502499464e-06, + "loss": 0.23891952633857727, + "step": 3220 + }, + { + "epoch": 1.5916223897195105, + "grad_norm": 1.3962733600240735, + "learning_rate": 2.2024693973037747e-06, + "loss": 0.2544774115085602, + "step": 3221 + }, + { + "epoch": 1.5921166440133447, + "grad_norm": 1.4193118785950918, + "learning_rate": 2.1973564976281003e-06, + "loss": 0.2620859444141388, + "step": 3222 + }, + { + "epoch": 1.592610898307179, + "grad_norm": 1.2794541555838774, + "learning_rate": 2.192248806885747e-06, + "loss": 0.22541281580924988, + "step": 3223 + }, + { + "epoch": 1.5931051526010132, + "grad_norm": 1.2886654382919192, + "learning_rate": 2.187146328486529e-06, + "loss": 0.23454351723194122, + "step": 3224 + }, + { + "epoch": 1.5935994068948474, + "grad_norm": 1.3236984572567387, + "learning_rate": 2.18204906583679e-06, + "loss": 0.24848732352256775, + "step": 3225 + }, + { + "epoch": 1.5940936611886816, + "grad_norm": 1.2049251354008288, + "learning_rate": 2.176957022339389e-06, + "loss": 0.21949590742588043, + "step": 3226 + }, + { + "epoch": 1.5945879154825158, + "grad_norm": 1.3436660246382202, + "learning_rate": 2.171870201393703e-06, + "loss": 0.27300944924354553, + "step": 3227 + }, + { + "epoch": 1.59508216977635, + "grad_norm": 1.3272049247129862, + "learning_rate": 2.16678860639562e-06, + "loss": 0.23850613832473755, + "step": 3228 + }, + { + "epoch": 1.5955764240701842, + "grad_norm": 1.4611172116234663, + "learning_rate": 2.1617122407375424e-06, + "loss": 0.2728792428970337, + "step": 3229 + }, + { + "epoch": 1.5960706783640184, + "grad_norm": 1.4623582491499052, + "learning_rate": 2.1566411078083726e-06, + "loss": 0.2321755588054657, + "step": 3230 + }, + { + "epoch": 1.5965649326578526, + "grad_norm": 1.5111460470858884, + "learning_rate": 2.1515752109935374e-06, + "loss": 0.30118101835250854, + "step": 3231 + }, + { + "epoch": 1.5970591869516868, + "grad_norm": 1.2041348970592753, + "learning_rate": 2.1465145536749475e-06, + "loss": 0.22317390143871307, + "step": 3232 + }, + { + "epoch": 1.5975534412455208, + "grad_norm": 1.4530812438401597, + "learning_rate": 2.141459139231029e-06, + "loss": 0.2906285524368286, + "step": 3233 + }, + { + "epoch": 1.598047695539355, + "grad_norm": 1.3996891865587815, + "learning_rate": 2.136408971036704e-06, + "loss": 0.24645069241523743, + "step": 3234 + }, + { + "epoch": 1.5985419498331892, + "grad_norm": 1.4725365119055005, + "learning_rate": 2.1313640524633927e-06, + "loss": 0.26764121651649475, + "step": 3235 + }, + { + "epoch": 1.5990362041270234, + "grad_norm": 1.4093554565168636, + "learning_rate": 2.126324386879012e-06, + "loss": 0.2811397910118103, + "step": 3236 + }, + { + "epoch": 1.5995304584208574, + "grad_norm": 1.5104838755570678, + "learning_rate": 2.121289977647971e-06, + "loss": 0.254316508769989, + "step": 3237 + }, + { + "epoch": 1.6000247127146916, + "grad_norm": 1.378432417546232, + "learning_rate": 2.1162608281311636e-06, + "loss": 0.2479352205991745, + "step": 3238 + }, + { + "epoch": 1.6005189670085258, + "grad_norm": 1.200711868039053, + "learning_rate": 2.1112369416859847e-06, + "loss": 0.22767537832260132, + "step": 3239 + }, + { + "epoch": 1.60101322130236, + "grad_norm": 1.3194506024522585, + "learning_rate": 2.106218321666309e-06, + "loss": 0.24286411702632904, + "step": 3240 + }, + { + "epoch": 1.6015074755961942, + "grad_norm": 1.4143801874217299, + "learning_rate": 2.1012049714224914e-06, + "loss": 0.22960595786571503, + "step": 3241 + }, + { + "epoch": 1.6020017298900284, + "grad_norm": 1.3710437918045983, + "learning_rate": 2.0961968943013742e-06, + "loss": 0.2448965162038803, + "step": 3242 + }, + { + "epoch": 1.6024959841838626, + "grad_norm": 1.3544107087641921, + "learning_rate": 2.0911940936462794e-06, + "loss": 0.23486846685409546, + "step": 3243 + }, + { + "epoch": 1.6029902384776968, + "grad_norm": 1.576868207611872, + "learning_rate": 2.0861965727970045e-06, + "loss": 0.2470572590827942, + "step": 3244 + }, + { + "epoch": 1.603484492771531, + "grad_norm": 1.3371604672673962, + "learning_rate": 2.0812043350898226e-06, + "loss": 0.283765971660614, + "step": 3245 + }, + { + "epoch": 1.6039787470653653, + "grad_norm": 1.3089763025714083, + "learning_rate": 2.076217383857484e-06, + "loss": 0.24943199753761292, + "step": 3246 + }, + { + "epoch": 1.6044730013591995, + "grad_norm": 1.2872721191375163, + "learning_rate": 2.0712357224291966e-06, + "loss": 0.22150146961212158, + "step": 3247 + }, + { + "epoch": 1.6049672556530334, + "grad_norm": 1.3204310548386595, + "learning_rate": 2.0662593541306563e-06, + "loss": 0.2610163390636444, + "step": 3248 + }, + { + "epoch": 1.6054615099468676, + "grad_norm": 1.243779369506435, + "learning_rate": 2.0612882822840154e-06, + "loss": 0.22789397835731506, + "step": 3249 + }, + { + "epoch": 1.6059557642407019, + "grad_norm": 1.3699765130937176, + "learning_rate": 2.056322510207882e-06, + "loss": 0.22956407070159912, + "step": 3250 + }, + { + "epoch": 1.606450018534536, + "grad_norm": 1.3752485526796745, + "learning_rate": 2.051362041217341e-06, + "loss": 0.2579299509525299, + "step": 3251 + }, + { + "epoch": 1.60694427282837, + "grad_norm": 1.449594870075983, + "learning_rate": 2.046406878623929e-06, + "loss": 0.24655218422412872, + "step": 3252 + }, + { + "epoch": 1.6074385271222043, + "grad_norm": 1.8413073723455704, + "learning_rate": 2.0414570257356415e-06, + "loss": 0.2325882464647293, + "step": 3253 + }, + { + "epoch": 1.6079327814160385, + "grad_norm": 1.3704743037638702, + "learning_rate": 2.0365124858569294e-06, + "loss": 0.2678581476211548, + "step": 3254 + }, + { + "epoch": 1.6084270357098727, + "grad_norm": 1.3329052595945479, + "learning_rate": 2.0315732622886976e-06, + "loss": 0.23200136423110962, + "step": 3255 + }, + { + "epoch": 1.6089212900037069, + "grad_norm": 1.3902434854443921, + "learning_rate": 2.0266393583283015e-06, + "loss": 0.24957536160945892, + "step": 3256 + }, + { + "epoch": 1.609415544297541, + "grad_norm": 1.3655551679458238, + "learning_rate": 2.0217107772695467e-06, + "loss": 0.2506657540798187, + "step": 3257 + }, + { + "epoch": 1.6099097985913753, + "grad_norm": 1.3749666602598227, + "learning_rate": 2.0167875224026788e-06, + "loss": 0.22255182266235352, + "step": 3258 + }, + { + "epoch": 1.6104040528852095, + "grad_norm": 1.222166534445823, + "learning_rate": 2.011869597014392e-06, + "loss": 0.2489611655473709, + "step": 3259 + }, + { + "epoch": 1.6108983071790437, + "grad_norm": 1.3283456479938487, + "learning_rate": 2.0069570043878305e-06, + "loss": 0.24808533489704132, + "step": 3260 + }, + { + "epoch": 1.611392561472878, + "grad_norm": 1.3963788000978605, + "learning_rate": 2.0020497478025635e-06, + "loss": 0.24013441801071167, + "step": 3261 + }, + { + "epoch": 1.6118868157667121, + "grad_norm": 1.4500389435817727, + "learning_rate": 1.997147830534608e-06, + "loss": 0.2813841998577118, + "step": 3262 + }, + { + "epoch": 1.612381070060546, + "grad_norm": 1.4817798618081903, + "learning_rate": 1.9922512558564154e-06, + "loss": 0.23727375268936157, + "step": 3263 + }, + { + "epoch": 1.6128753243543803, + "grad_norm": 1.3341145171932982, + "learning_rate": 1.9873600270368664e-06, + "loss": 0.2341655194759369, + "step": 3264 + }, + { + "epoch": 1.6133695786482145, + "grad_norm": 1.4911949653625025, + "learning_rate": 1.9824741473412768e-06, + "loss": 0.32069963216781616, + "step": 3265 + }, + { + "epoch": 1.6138638329420487, + "grad_norm": 1.4329702924332965, + "learning_rate": 1.977593620031393e-06, + "loss": 0.2414681762456894, + "step": 3266 + }, + { + "epoch": 1.6143580872358827, + "grad_norm": 1.4261430753271709, + "learning_rate": 1.9727184483653793e-06, + "loss": 0.25517842173576355, + "step": 3267 + }, + { + "epoch": 1.614852341529717, + "grad_norm": 1.4664579571771421, + "learning_rate": 1.967848635597831e-06, + "loss": 0.28264889121055603, + "step": 3268 + }, + { + "epoch": 1.6153465958235511, + "grad_norm": 1.2722226456356633, + "learning_rate": 1.962984184979774e-06, + "loss": 0.24543075263500214, + "step": 3269 + }, + { + "epoch": 1.6158408501173853, + "grad_norm": 1.3971489540759634, + "learning_rate": 1.9581250997586366e-06, + "loss": 0.2770763039588928, + "step": 3270 + }, + { + "epoch": 1.6163351044112195, + "grad_norm": 1.362601366326608, + "learning_rate": 1.953271383178278e-06, + "loss": 0.2521423101425171, + "step": 3271 + }, + { + "epoch": 1.6168293587050537, + "grad_norm": 1.262726405313237, + "learning_rate": 1.9484230384789702e-06, + "loss": 0.2402455359697342, + "step": 3272 + }, + { + "epoch": 1.617323612998888, + "grad_norm": 1.4929089203163604, + "learning_rate": 1.9435800688974005e-06, + "loss": 0.2947021424770355, + "step": 3273 + }, + { + "epoch": 1.6178178672927221, + "grad_norm": 1.2382587228414774, + "learning_rate": 1.938742477666663e-06, + "loss": 0.22238701581954956, + "step": 3274 + }, + { + "epoch": 1.6183121215865564, + "grad_norm": 1.2835510888376274, + "learning_rate": 1.933910268016269e-06, + "loss": 0.25475019216537476, + "step": 3275 + }, + { + "epoch": 1.6188063758803906, + "grad_norm": 1.2545564646453307, + "learning_rate": 1.929083443172125e-06, + "loss": 0.2316315472126007, + "step": 3276 + }, + { + "epoch": 1.6193006301742248, + "grad_norm": 1.2739392933893041, + "learning_rate": 1.9242620063565598e-06, + "loss": 0.24977952241897583, + "step": 3277 + }, + { + "epoch": 1.619794884468059, + "grad_norm": 1.3712132397422443, + "learning_rate": 1.9194459607882887e-06, + "loss": 0.24006152153015137, + "step": 3278 + }, + { + "epoch": 1.620289138761893, + "grad_norm": 1.2866259343493134, + "learning_rate": 1.9146353096824366e-06, + "loss": 0.26050522923469543, + "step": 3279 + }, + { + "epoch": 1.6207833930557272, + "grad_norm": 1.338449999730035, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.2698773443698883, + "step": 3280 + }, + { + "epoch": 1.6212776473495614, + "grad_norm": 1.480404977138073, + "learning_rate": 1.9050302037004765e-06, + "loss": 0.2627784013748169, + "step": 3281 + }, + { + "epoch": 1.6217719016433956, + "grad_norm": 1.3335627547093958, + "learning_rate": 1.900235755236599e-06, + "loss": 0.24261148273944855, + "step": 3282 + }, + { + "epoch": 1.6222661559372296, + "grad_norm": 1.347149973540751, + "learning_rate": 1.8954467140596023e-06, + "loss": 0.24689635634422302, + "step": 3283 + }, + { + "epoch": 1.6227604102310638, + "grad_norm": 1.4586477344669697, + "learning_rate": 1.890663083366574e-06, + "loss": 0.2885867953300476, + "step": 3284 + }, + { + "epoch": 1.623254664524898, + "grad_norm": 1.2981242679817548, + "learning_rate": 1.8858848663510066e-06, + "loss": 0.2624407112598419, + "step": 3285 + }, + { + "epoch": 1.6237489188187322, + "grad_norm": 1.4544775837390882, + "learning_rate": 1.881112066202767e-06, + "loss": 0.27705928683280945, + "step": 3286 + }, + { + "epoch": 1.6242431731125664, + "grad_norm": 1.4465119903360202, + "learning_rate": 1.8763446861081058e-06, + "loss": 0.26406094431877136, + "step": 3287 + }, + { + "epoch": 1.6247374274064006, + "grad_norm": 1.3239739188563808, + "learning_rate": 1.8715827292496557e-06, + "loss": 0.26495790481567383, + "step": 3288 + }, + { + "epoch": 1.6252316817002348, + "grad_norm": 1.419298583557058, + "learning_rate": 1.8668261988064406e-06, + "loss": 0.24995195865631104, + "step": 3289 + }, + { + "epoch": 1.625725935994069, + "grad_norm": 1.4058286500391235, + "learning_rate": 1.8620750979538437e-06, + "loss": 0.23043034970760345, + "step": 3290 + }, + { + "epoch": 1.6262201902879032, + "grad_norm": 1.3959905154788135, + "learning_rate": 1.8573294298636334e-06, + "loss": 0.2590731978416443, + "step": 3291 + }, + { + "epoch": 1.6267144445817374, + "grad_norm": 1.3919450960931963, + "learning_rate": 1.8525891977039557e-06, + "loss": 0.24246811866760254, + "step": 3292 + }, + { + "epoch": 1.6272086988755716, + "grad_norm": 1.2790623939923147, + "learning_rate": 1.847854404639311e-06, + "loss": 0.2386825680732727, + "step": 3293 + }, + { + "epoch": 1.6277029531694056, + "grad_norm": 1.3168324939527787, + "learning_rate": 1.843125053830588e-06, + "loss": 0.2243885099887848, + "step": 3294 + }, + { + "epoch": 1.6281972074632398, + "grad_norm": 1.264397606173487, + "learning_rate": 1.838401148435035e-06, + "loss": 0.24984796345233917, + "step": 3295 + }, + { + "epoch": 1.628691461757074, + "grad_norm": 1.3015406971863621, + "learning_rate": 1.8336826916062568e-06, + "loss": 0.22784638404846191, + "step": 3296 + }, + { + "epoch": 1.6291857160509082, + "grad_norm": 1.4178841831435534, + "learning_rate": 1.828969686494232e-06, + "loss": 0.24812597036361694, + "step": 3297 + }, + { + "epoch": 1.6296799703447422, + "grad_norm": 1.2132930880582795, + "learning_rate": 1.8242621362452939e-06, + "loss": 0.234031543135643, + "step": 3298 + }, + { + "epoch": 1.6301742246385764, + "grad_norm": 1.251471335677166, + "learning_rate": 1.8195600440021377e-06, + "loss": 0.22455371916294098, + "step": 3299 + }, + { + "epoch": 1.6306684789324106, + "grad_norm": 1.2488121980955387, + "learning_rate": 1.8148634129038113e-06, + "loss": 0.22605910897254944, + "step": 3300 + }, + { + "epoch": 1.6311627332262448, + "grad_norm": 1.3484568896035969, + "learning_rate": 1.8101722460857184e-06, + "loss": 0.2527684271335602, + "step": 3301 + }, + { + "epoch": 1.631656987520079, + "grad_norm": 1.355365003110194, + "learning_rate": 1.8054865466796167e-06, + "loss": 0.24625766277313232, + "step": 3302 + }, + { + "epoch": 1.6321512418139132, + "grad_norm": 1.7061568076136007, + "learning_rate": 1.8008063178136125e-06, + "loss": 0.31236231327056885, + "step": 3303 + }, + { + "epoch": 1.6326454961077475, + "grad_norm": 1.3433390649211776, + "learning_rate": 1.7961315626121566e-06, + "loss": 0.21256005764007568, + "step": 3304 + }, + { + "epoch": 1.6331397504015817, + "grad_norm": 1.5039173087965194, + "learning_rate": 1.7914622841960482e-06, + "loss": 0.25238949060440063, + "step": 3305 + }, + { + "epoch": 1.6336340046954159, + "grad_norm": 1.3709723014330413, + "learning_rate": 1.7867984856824382e-06, + "loss": 0.29630619287490845, + "step": 3306 + }, + { + "epoch": 1.63412825898925, + "grad_norm": 1.3891654533842075, + "learning_rate": 1.782140170184804e-06, + "loss": 0.26159363985061646, + "step": 3307 + }, + { + "epoch": 1.6346225132830843, + "grad_norm": 1.2884457367333761, + "learning_rate": 1.7774873408129733e-06, + "loss": 0.22361448407173157, + "step": 3308 + }, + { + "epoch": 1.6351167675769185, + "grad_norm": 1.410142665529872, + "learning_rate": 1.7728400006731083e-06, + "loss": 0.23890942335128784, + "step": 3309 + }, + { + "epoch": 1.6356110218707525, + "grad_norm": 1.3147986477314286, + "learning_rate": 1.7681981528677073e-06, + "loss": 0.23067504167556763, + "step": 3310 + }, + { + "epoch": 1.6361052761645867, + "grad_norm": 1.4202307068972662, + "learning_rate": 1.7635618004956012e-06, + "loss": 0.24790561199188232, + "step": 3311 + }, + { + "epoch": 1.6365995304584209, + "grad_norm": 1.3183461895569366, + "learning_rate": 1.7589309466519556e-06, + "loss": 0.2590476870536804, + "step": 3312 + }, + { + "epoch": 1.637093784752255, + "grad_norm": 1.4033807602679105, + "learning_rate": 1.754305594428254e-06, + "loss": 0.26833316683769226, + "step": 3313 + }, + { + "epoch": 1.637588039046089, + "grad_norm": 1.2949590395956057, + "learning_rate": 1.749685746912323e-06, + "loss": 0.23390671610832214, + "step": 3314 + }, + { + "epoch": 1.6380822933399233, + "grad_norm": 1.350070481785481, + "learning_rate": 1.7450714071883079e-06, + "loss": 0.2760172188282013, + "step": 3315 + }, + { + "epoch": 1.6385765476337575, + "grad_norm": 1.387338184553767, + "learning_rate": 1.7404625783366703e-06, + "loss": 0.255672812461853, + "step": 3316 + }, + { + "epoch": 1.6390708019275917, + "grad_norm": 1.4073549622144716, + "learning_rate": 1.7358592634342008e-06, + "loss": 0.26336947083473206, + "step": 3317 + }, + { + "epoch": 1.639565056221426, + "grad_norm": 1.2609217918610456, + "learning_rate": 1.7312614655540071e-06, + "loss": 0.2308199107646942, + "step": 3318 + }, + { + "epoch": 1.64005931051526, + "grad_norm": 1.335188741822115, + "learning_rate": 1.7266691877655129e-06, + "loss": 0.24762676656246185, + "step": 3319 + }, + { + "epoch": 1.6405535648090943, + "grad_norm": 1.3287358421539026, + "learning_rate": 1.7220824331344577e-06, + "loss": 0.2175157219171524, + "step": 3320 + }, + { + "epoch": 1.6410478191029285, + "grad_norm": 1.3635707435478155, + "learning_rate": 1.7175012047228956e-06, + "loss": 0.24319039285182953, + "step": 3321 + }, + { + "epoch": 1.6415420733967627, + "grad_norm": 1.2272267263054326, + "learning_rate": 1.7129255055891813e-06, + "loss": 0.21708521246910095, + "step": 3322 + }, + { + "epoch": 1.642036327690597, + "grad_norm": 1.4404881849035673, + "learning_rate": 1.7083553387879969e-06, + "loss": 0.28576910495758057, + "step": 3323 + }, + { + "epoch": 1.6425305819844311, + "grad_norm": 1.3120467826579518, + "learning_rate": 1.703790707370313e-06, + "loss": 0.2664312720298767, + "step": 3324 + }, + { + "epoch": 1.6430248362782651, + "grad_norm": 1.5950926505285568, + "learning_rate": 1.6992316143834142e-06, + "loss": 0.23930951952934265, + "step": 3325 + }, + { + "epoch": 1.6435190905720993, + "grad_norm": 1.3985303284465023, + "learning_rate": 1.694678062870886e-06, + "loss": 0.2741955518722534, + "step": 3326 + }, + { + "epoch": 1.6440133448659335, + "grad_norm": 1.2830935776841221, + "learning_rate": 1.6901300558726142e-06, + "loss": 0.25177690386772156, + "step": 3327 + }, + { + "epoch": 1.6445075991597677, + "grad_norm": 1.4111945712412088, + "learning_rate": 1.6855875964247837e-06, + "loss": 0.26517611742019653, + "step": 3328 + }, + { + "epoch": 1.6450018534536017, + "grad_norm": 1.227994601145186, + "learning_rate": 1.6810506875598776e-06, + "loss": 0.2294573187828064, + "step": 3329 + }, + { + "epoch": 1.645496107747436, + "grad_norm": 1.3101987526620804, + "learning_rate": 1.6765193323066653e-06, + "loss": 0.23062998056411743, + "step": 3330 + }, + { + "epoch": 1.6459903620412701, + "grad_norm": 1.4687005380243534, + "learning_rate": 1.6719935336902205e-06, + "loss": 0.3047422468662262, + "step": 3331 + }, + { + "epoch": 1.6464846163351043, + "grad_norm": 1.4214345840675306, + "learning_rate": 1.6674732947319017e-06, + "loss": 0.2715694308280945, + "step": 3332 + }, + { + "epoch": 1.6469788706289386, + "grad_norm": 1.3486732362780178, + "learning_rate": 1.6629586184493519e-06, + "loss": 0.20359721779823303, + "step": 3333 + }, + { + "epoch": 1.6474731249227728, + "grad_norm": 1.256842666883273, + "learning_rate": 1.6584495078565045e-06, + "loss": 0.20083262026309967, + "step": 3334 + }, + { + "epoch": 1.647967379216607, + "grad_norm": 1.2824441486710174, + "learning_rate": 1.6539459659635848e-06, + "loss": 0.2274707555770874, + "step": 3335 + }, + { + "epoch": 1.6484616335104412, + "grad_norm": 1.4170790489583633, + "learning_rate": 1.6494479957770847e-06, + "loss": 0.2654137909412384, + "step": 3336 + }, + { + "epoch": 1.6489558878042754, + "grad_norm": 1.2207871831065553, + "learning_rate": 1.644955600299788e-06, + "loss": 0.24672716856002808, + "step": 3337 + }, + { + "epoch": 1.6494501420981096, + "grad_norm": 2.7206661248050494, + "learning_rate": 1.640468782530753e-06, + "loss": 0.21563802659511566, + "step": 3338 + }, + { + "epoch": 1.6499443963919438, + "grad_norm": 1.2772497258385302, + "learning_rate": 1.6359875454653151e-06, + "loss": 0.22986169159412384, + "step": 3339 + }, + { + "epoch": 1.650438650685778, + "grad_norm": 1.1914212857874291, + "learning_rate": 1.6315118920950857e-06, + "loss": 0.22981731593608856, + "step": 3340 + }, + { + "epoch": 1.650932904979612, + "grad_norm": 1.423180347857553, + "learning_rate": 1.6270418254079478e-06, + "loss": 0.25922536849975586, + "step": 3341 + }, + { + "epoch": 1.6514271592734462, + "grad_norm": 1.3808711162643625, + "learning_rate": 1.6225773483880503e-06, + "loss": 0.23273468017578125, + "step": 3342 + }, + { + "epoch": 1.6519214135672804, + "grad_norm": 1.3019728240659525, + "learning_rate": 1.6181184640158165e-06, + "loss": 0.22988896071910858, + "step": 3343 + }, + { + "epoch": 1.6524156678611144, + "grad_norm": 1.3674976753844925, + "learning_rate": 1.6136651752679333e-06, + "loss": 0.2628646790981293, + "step": 3344 + }, + { + "epoch": 1.6529099221549486, + "grad_norm": 1.3498513177046836, + "learning_rate": 1.6092174851173526e-06, + "loss": 0.24670086801052094, + "step": 3345 + }, + { + "epoch": 1.6534041764487828, + "grad_norm": 1.3175919767027275, + "learning_rate": 1.6047753965332902e-06, + "loss": 0.27845436334609985, + "step": 3346 + }, + { + "epoch": 1.653898430742617, + "grad_norm": 1.30200656487082, + "learning_rate": 1.6003389124812185e-06, + "loss": 0.25297483801841736, + "step": 3347 + }, + { + "epoch": 1.6543926850364512, + "grad_norm": 1.237195636484559, + "learning_rate": 1.595908035922873e-06, + "loss": 0.18876859545707703, + "step": 3348 + }, + { + "epoch": 1.6548869393302854, + "grad_norm": 1.3417621492525376, + "learning_rate": 1.591482769816246e-06, + "loss": 0.23852673172950745, + "step": 3349 + }, + { + "epoch": 1.6553811936241196, + "grad_norm": 1.3350614987774176, + "learning_rate": 1.587063117115576e-06, + "loss": 0.2569701373577118, + "step": 3350 + }, + { + "epoch": 1.6558754479179538, + "grad_norm": 1.354350083762125, + "learning_rate": 1.582649080771359e-06, + "loss": 0.29305699467658997, + "step": 3351 + }, + { + "epoch": 1.656369702211788, + "grad_norm": 1.42534989112271, + "learning_rate": 1.5782406637303527e-06, + "loss": 0.28942832350730896, + "step": 3352 + }, + { + "epoch": 1.6568639565056222, + "grad_norm": 1.351062882636418, + "learning_rate": 1.5738378689355439e-06, + "loss": 0.27491068840026855, + "step": 3353 + }, + { + "epoch": 1.6573582107994564, + "grad_norm": 1.4736732865815314, + "learning_rate": 1.569440699326179e-06, + "loss": 0.26730844378471375, + "step": 3354 + }, + { + "epoch": 1.6578524650932907, + "grad_norm": 1.3194299490413177, + "learning_rate": 1.5650491578377458e-06, + "loss": 0.23610982298851013, + "step": 3355 + }, + { + "epoch": 1.6583467193871246, + "grad_norm": 1.5894671595119023, + "learning_rate": 1.5606632474019734e-06, + "loss": 0.26817262172698975, + "step": 3356 + }, + { + "epoch": 1.6588409736809588, + "grad_norm": 1.4847304906222882, + "learning_rate": 1.556282970946833e-06, + "loss": 0.2403341382741928, + "step": 3357 + }, + { + "epoch": 1.659335227974793, + "grad_norm": 1.4109665373138245, + "learning_rate": 1.5519083313965378e-06, + "loss": 0.24433058500289917, + "step": 3358 + }, + { + "epoch": 1.6598294822686273, + "grad_norm": 1.2685951523616033, + "learning_rate": 1.5475393316715282e-06, + "loss": 0.2526702582836151, + "step": 3359 + }, + { + "epoch": 1.6603237365624612, + "grad_norm": 1.3373930264060108, + "learning_rate": 1.543175974688491e-06, + "loss": 0.24032334983348846, + "step": 3360 + }, + { + "epoch": 1.6608179908562954, + "grad_norm": 1.3759465001084996, + "learning_rate": 1.5388182633603433e-06, + "loss": 0.27770349383354187, + "step": 3361 + }, + { + "epoch": 1.6613122451501297, + "grad_norm": 1.5590715119269358, + "learning_rate": 1.534466200596224e-06, + "loss": 0.26002752780914307, + "step": 3362 + }, + { + "epoch": 1.6618064994439639, + "grad_norm": 1.867324678142589, + "learning_rate": 1.5301197893015129e-06, + "loss": 0.2707037329673767, + "step": 3363 + }, + { + "epoch": 1.662300753737798, + "grad_norm": 1.3300911116600942, + "learning_rate": 1.52577903237781e-06, + "loss": 0.27249252796173096, + "step": 3364 + }, + { + "epoch": 1.6627950080316323, + "grad_norm": 1.341030721831506, + "learning_rate": 1.5214439327229425e-06, + "loss": 0.22495020925998688, + "step": 3365 + }, + { + "epoch": 1.6632892623254665, + "grad_norm": 1.4580410293752506, + "learning_rate": 1.5171144932309622e-06, + "loss": 0.23561973869800568, + "step": 3366 + }, + { + "epoch": 1.6637835166193007, + "grad_norm": 1.4580927261417298, + "learning_rate": 1.512790716792143e-06, + "loss": 0.2689869701862335, + "step": 3367 + }, + { + "epoch": 1.664277770913135, + "grad_norm": 1.2734577307213573, + "learning_rate": 1.5084726062929688e-06, + "loss": 0.22249455749988556, + "step": 3368 + }, + { + "epoch": 1.664772025206969, + "grad_norm": 1.444110335390912, + "learning_rate": 1.5041601646161585e-06, + "loss": 0.24586130678653717, + "step": 3369 + }, + { + "epoch": 1.6652662795008033, + "grad_norm": 1.3250583547488792, + "learning_rate": 1.499853394640629e-06, + "loss": 0.2549409568309784, + "step": 3370 + }, + { + "epoch": 1.6657605337946373, + "grad_norm": 1.4135792596464256, + "learning_rate": 1.4955522992415206e-06, + "loss": 0.2517774999141693, + "step": 3371 + }, + { + "epoch": 1.6662547880884715, + "grad_norm": 1.6132674993246225, + "learning_rate": 1.491256881290184e-06, + "loss": 0.2627662420272827, + "step": 3372 + }, + { + "epoch": 1.6667490423823057, + "grad_norm": 1.250156659660365, + "learning_rate": 1.4869671436541788e-06, + "loss": 0.25203272700309753, + "step": 3373 + }, + { + "epoch": 1.66724329667614, + "grad_norm": 1.3035778741812132, + "learning_rate": 1.482683089197271e-06, + "loss": 0.2206164300441742, + "step": 3374 + }, + { + "epoch": 1.667737550969974, + "grad_norm": 1.4034071560123977, + "learning_rate": 1.4784047207794383e-06, + "loss": 0.2551203966140747, + "step": 3375 + }, + { + "epoch": 1.668231805263808, + "grad_norm": 1.4247468939554981, + "learning_rate": 1.4741320412568505e-06, + "loss": 0.2592264711856842, + "step": 3376 + }, + { + "epoch": 1.6687260595576423, + "grad_norm": 1.3609833066581156, + "learning_rate": 1.4698650534818936e-06, + "loss": 0.25902658700942993, + "step": 3377 + }, + { + "epoch": 1.6692203138514765, + "grad_norm": 1.5283083080675575, + "learning_rate": 1.4656037603031491e-06, + "loss": 0.2685459852218628, + "step": 3378 + }, + { + "epoch": 1.6697145681453107, + "grad_norm": 1.2083368696295387, + "learning_rate": 1.4613481645653914e-06, + "loss": 0.21010839939117432, + "step": 3379 + }, + { + "epoch": 1.670208822439145, + "grad_norm": 1.3019618254178054, + "learning_rate": 1.4570982691095925e-06, + "loss": 0.23318082094192505, + "step": 3380 + }, + { + "epoch": 1.6707030767329791, + "grad_norm": 1.346937478273973, + "learning_rate": 1.4528540767729315e-06, + "loss": 0.25045326352119446, + "step": 3381 + }, + { + "epoch": 1.6711973310268133, + "grad_norm": 1.5157571774504706, + "learning_rate": 1.4486155903887623e-06, + "loss": 0.2436288446187973, + "step": 3382 + }, + { + "epoch": 1.6716915853206475, + "grad_norm": 1.2766580343897052, + "learning_rate": 1.444382812786641e-06, + "loss": 0.20454761385917664, + "step": 3383 + }, + { + "epoch": 1.6721858396144818, + "grad_norm": 1.3207693230256567, + "learning_rate": 1.4401557467923089e-06, + "loss": 0.24906963109970093, + "step": 3384 + }, + { + "epoch": 1.672680093908316, + "grad_norm": 1.3391460516330347, + "learning_rate": 1.435934395227695e-06, + "loss": 0.2552015483379364, + "step": 3385 + }, + { + "epoch": 1.6731743482021502, + "grad_norm": 1.3523733680416914, + "learning_rate": 1.4317187609109129e-06, + "loss": 0.2393915057182312, + "step": 3386 + }, + { + "epoch": 1.6736686024959841, + "grad_norm": 1.370539563215592, + "learning_rate": 1.4275088466562625e-06, + "loss": 0.2607477009296417, + "step": 3387 + }, + { + "epoch": 1.6741628567898184, + "grad_norm": 1.3296614147148798, + "learning_rate": 1.423304655274218e-06, + "loss": 0.23722632229328156, + "step": 3388 + }, + { + "epoch": 1.6746571110836526, + "grad_norm": 1.303256653854929, + "learning_rate": 1.4191061895714398e-06, + "loss": 0.2614964246749878, + "step": 3389 + }, + { + "epoch": 1.6751513653774868, + "grad_norm": 1.476448410559568, + "learning_rate": 1.4149134523507634e-06, + "loss": 0.2727823555469513, + "step": 3390 + }, + { + "epoch": 1.6756456196713208, + "grad_norm": 1.2739771939884463, + "learning_rate": 1.4107264464112003e-06, + "loss": 0.25176581740379333, + "step": 3391 + }, + { + "epoch": 1.676139873965155, + "grad_norm": 1.3087240197668597, + "learning_rate": 1.4065451745479352e-06, + "loss": 0.21339070796966553, + "step": 3392 + }, + { + "epoch": 1.6766341282589892, + "grad_norm": 1.449069234603101, + "learning_rate": 1.4023696395523267e-06, + "loss": 0.26540419459342957, + "step": 3393 + }, + { + "epoch": 1.6771283825528234, + "grad_norm": 1.3788929945945605, + "learning_rate": 1.3981998442119017e-06, + "loss": 0.2621360421180725, + "step": 3394 + }, + { + "epoch": 1.6776226368466576, + "grad_norm": 1.3149158272362809, + "learning_rate": 1.3940357913103576e-06, + "loss": 0.2578747570514679, + "step": 3395 + }, + { + "epoch": 1.6781168911404918, + "grad_norm": 1.3223117210430684, + "learning_rate": 1.3898774836275531e-06, + "loss": 0.26105010509490967, + "step": 3396 + }, + { + "epoch": 1.678611145434326, + "grad_norm": 1.277709690267506, + "learning_rate": 1.3857249239395143e-06, + "loss": 0.2221919298171997, + "step": 3397 + }, + { + "epoch": 1.6791053997281602, + "grad_norm": 1.3742911888899896, + "learning_rate": 1.3815781150184382e-06, + "loss": 0.2498932033777237, + "step": 3398 + }, + { + "epoch": 1.6795996540219944, + "grad_norm": 1.3631278461436225, + "learning_rate": 1.377437059632668e-06, + "loss": 0.29306796193122864, + "step": 3399 + }, + { + "epoch": 1.6800939083158286, + "grad_norm": 1.41106483401144, + "learning_rate": 1.3733017605467158e-06, + "loss": 0.23804892599582672, + "step": 3400 + }, + { + "epoch": 1.6805881626096628, + "grad_norm": 1.264388446305106, + "learning_rate": 1.3691722205212465e-06, + "loss": 0.18528425693511963, + "step": 3401 + }, + { + "epoch": 1.6810824169034968, + "grad_norm": 1.434400904695952, + "learning_rate": 1.365048442313085e-06, + "loss": 0.257534921169281, + "step": 3402 + }, + { + "epoch": 1.681576671197331, + "grad_norm": 1.390183210111369, + "learning_rate": 1.3609304286752034e-06, + "loss": 0.2519993782043457, + "step": 3403 + }, + { + "epoch": 1.6820709254911652, + "grad_norm": 1.5041703905686798, + "learning_rate": 1.3568181823567328e-06, + "loss": 0.27830445766448975, + "step": 3404 + }, + { + "epoch": 1.6825651797849994, + "grad_norm": 1.3496130761993563, + "learning_rate": 1.3527117061029438e-06, + "loss": 0.22532883286476135, + "step": 3405 + }, + { + "epoch": 1.6830594340788334, + "grad_norm": 1.3484913124474047, + "learning_rate": 1.3486110026552668e-06, + "loss": 0.23230011761188507, + "step": 3406 + }, + { + "epoch": 1.6835536883726676, + "grad_norm": 1.320791018685261, + "learning_rate": 1.3445160747512743e-06, + "loss": 0.24105653166770935, + "step": 3407 + }, + { + "epoch": 1.6840479426665018, + "grad_norm": 1.5077644423875391, + "learning_rate": 1.340426925124676e-06, + "loss": 0.2946394681930542, + "step": 3408 + }, + { + "epoch": 1.684542196960336, + "grad_norm": 1.403422513607122, + "learning_rate": 1.3363435565053319e-06, + "loss": 0.2682989239692688, + "step": 3409 + }, + { + "epoch": 1.6850364512541702, + "grad_norm": 1.3363195283881322, + "learning_rate": 1.332265971619241e-06, + "loss": 0.2219456285238266, + "step": 3410 + }, + { + "epoch": 1.6855307055480044, + "grad_norm": 1.2440577869208935, + "learning_rate": 1.3281941731885396e-06, + "loss": 0.22532151639461517, + "step": 3411 + }, + { + "epoch": 1.6860249598418386, + "grad_norm": 1.3951142777226702, + "learning_rate": 1.324128163931504e-06, + "loss": 0.24166807532310486, + "step": 3412 + }, + { + "epoch": 1.6865192141356729, + "grad_norm": 1.8803758040895027, + "learning_rate": 1.3200679465625453e-06, + "loss": 0.25514671206474304, + "step": 3413 + }, + { + "epoch": 1.687013468429507, + "grad_norm": 1.4161288294493581, + "learning_rate": 1.3160135237922011e-06, + "loss": 0.263123482465744, + "step": 3414 + }, + { + "epoch": 1.6875077227233413, + "grad_norm": 1.3692510048196695, + "learning_rate": 1.3119648983271527e-06, + "loss": 0.23763976991176605, + "step": 3415 + }, + { + "epoch": 1.6880019770171755, + "grad_norm": 1.4514594135261416, + "learning_rate": 1.3079220728701991e-06, + "loss": 0.28645598888397217, + "step": 3416 + }, + { + "epoch": 1.6884962313110097, + "grad_norm": 1.3145652794970974, + "learning_rate": 1.303885050120275e-06, + "loss": 0.2269624024629593, + "step": 3417 + }, + { + "epoch": 1.6889904856048437, + "grad_norm": 1.2380861054344243, + "learning_rate": 1.2998538327724386e-06, + "loss": 0.23601466417312622, + "step": 3418 + }, + { + "epoch": 1.6894847398986779, + "grad_norm": 1.4253359182592056, + "learning_rate": 1.2958284235178743e-06, + "loss": 0.2246169149875641, + "step": 3419 + }, + { + "epoch": 1.689978994192512, + "grad_norm": 1.497489718348998, + "learning_rate": 1.2918088250438865e-06, + "loss": 0.26519715785980225, + "step": 3420 + }, + { + "epoch": 1.6904732484863463, + "grad_norm": 1.443915314302877, + "learning_rate": 1.2877950400339046e-06, + "loss": 0.2590267062187195, + "step": 3421 + }, + { + "epoch": 1.6909675027801803, + "grad_norm": 1.3941822393799335, + "learning_rate": 1.2837870711674672e-06, + "loss": 0.2535945773124695, + "step": 3422 + }, + { + "epoch": 1.6914617570740145, + "grad_norm": 1.3833358145204437, + "learning_rate": 1.279784921120244e-06, + "loss": 0.21907874941825867, + "step": 3423 + }, + { + "epoch": 1.6919560113678487, + "grad_norm": 1.3775789573220893, + "learning_rate": 1.2757885925640124e-06, + "loss": 0.23314553499221802, + "step": 3424 + }, + { + "epoch": 1.6924502656616829, + "grad_norm": 1.2335650824399806, + "learning_rate": 1.2717980881666615e-06, + "loss": 0.2288433313369751, + "step": 3425 + }, + { + "epoch": 1.692944519955517, + "grad_norm": 1.3218922014839134, + "learning_rate": 1.2678134105921924e-06, + "loss": 0.2285449206829071, + "step": 3426 + }, + { + "epoch": 1.6934387742493513, + "grad_norm": 1.4061495134031399, + "learning_rate": 1.2638345625007287e-06, + "loss": 0.2898653447628021, + "step": 3427 + }, + { + "epoch": 1.6939330285431855, + "grad_norm": 1.3140964049835469, + "learning_rate": 1.2598615465484831e-06, + "loss": 0.23574519157409668, + "step": 3428 + }, + { + "epoch": 1.6944272828370197, + "grad_norm": 1.8163323929078987, + "learning_rate": 1.2558943653877887e-06, + "loss": 0.23385417461395264, + "step": 3429 + }, + { + "epoch": 1.694921537130854, + "grad_norm": 1.4332956021988026, + "learning_rate": 1.2519330216670766e-06, + "loss": 0.2555482089519501, + "step": 3430 + }, + { + "epoch": 1.6954157914246881, + "grad_norm": 1.3005186125236943, + "learning_rate": 1.247977518030885e-06, + "loss": 0.22221535444259644, + "step": 3431 + }, + { + "epoch": 1.6959100457185223, + "grad_norm": 1.2645213358789251, + "learning_rate": 1.2440278571198516e-06, + "loss": 0.21753090620040894, + "step": 3432 + }, + { + "epoch": 1.6964043000123563, + "grad_norm": 1.3199124302473737, + "learning_rate": 1.240084041570716e-06, + "loss": 0.2352944314479828, + "step": 3433 + }, + { + "epoch": 1.6968985543061905, + "grad_norm": 1.3019158889354874, + "learning_rate": 1.2361460740163045e-06, + "loss": 0.22581814229488373, + "step": 3434 + }, + { + "epoch": 1.6973928086000247, + "grad_norm": 1.5051457985045136, + "learning_rate": 1.2322139570855596e-06, + "loss": 0.28703421354293823, + "step": 3435 + }, + { + "epoch": 1.697887062893859, + "grad_norm": 1.2466294121854475, + "learning_rate": 1.2282876934034972e-06, + "loss": 0.21528789401054382, + "step": 3436 + }, + { + "epoch": 1.698381317187693, + "grad_norm": 1.3714652202926056, + "learning_rate": 1.2243672855912393e-06, + "loss": 0.2675422430038452, + "step": 3437 + }, + { + "epoch": 1.6988755714815271, + "grad_norm": 1.4468798550658835, + "learning_rate": 1.2204527362659913e-06, + "loss": 0.26681527495384216, + "step": 3438 + }, + { + "epoch": 1.6993698257753613, + "grad_norm": 1.6692863707132455, + "learning_rate": 1.216544048041054e-06, + "loss": 0.2436470091342926, + "step": 3439 + }, + { + "epoch": 1.6998640800691955, + "grad_norm": 1.3471564011899657, + "learning_rate": 1.212641223525809e-06, + "loss": 0.25458425283432007, + "step": 3440 + }, + { + "epoch": 1.7003583343630297, + "grad_norm": 1.5076141037655715, + "learning_rate": 1.2087442653257286e-06, + "loss": 0.24890559911727905, + "step": 3441 + }, + { + "epoch": 1.700852588656864, + "grad_norm": 1.2935321774740525, + "learning_rate": 1.2048531760423642e-06, + "loss": 0.26031816005706787, + "step": 3442 + }, + { + "epoch": 1.7013468429506982, + "grad_norm": 1.2852726465517723, + "learning_rate": 1.200967958273349e-06, + "loss": 0.22184975445270538, + "step": 3443 + }, + { + "epoch": 1.7018410972445324, + "grad_norm": 1.4055101079653758, + "learning_rate": 1.1970886146124073e-06, + "loss": 0.2670953571796417, + "step": 3444 + }, + { + "epoch": 1.7023353515383666, + "grad_norm": 1.4509425159233789, + "learning_rate": 1.1932151476493247e-06, + "loss": 0.27950525283813477, + "step": 3445 + }, + { + "epoch": 1.7028296058322008, + "grad_norm": 1.177838308027136, + "learning_rate": 1.1893475599699766e-06, + "loss": 0.23257380723953247, + "step": 3446 + }, + { + "epoch": 1.703323860126035, + "grad_norm": 1.33833163811184, + "learning_rate": 1.1854858541563086e-06, + "loss": 0.2586575746536255, + "step": 3447 + }, + { + "epoch": 1.703818114419869, + "grad_norm": 1.4079485154063143, + "learning_rate": 1.1816300327863406e-06, + "loss": 0.2677457928657532, + "step": 3448 + }, + { + "epoch": 1.7043123687137032, + "grad_norm": 1.565618455451115, + "learning_rate": 1.1777800984341637e-06, + "loss": 0.29866284132003784, + "step": 3449 + }, + { + "epoch": 1.7048066230075374, + "grad_norm": 1.3858480302164131, + "learning_rate": 1.1739360536699397e-06, + "loss": 0.27279675006866455, + "step": 3450 + }, + { + "epoch": 1.7053008773013716, + "grad_norm": 1.4265301971817403, + "learning_rate": 1.1700979010598945e-06, + "loss": 0.25695672631263733, + "step": 3451 + }, + { + "epoch": 1.7057951315952056, + "grad_norm": 1.2548676263466874, + "learning_rate": 1.1662656431663278e-06, + "loss": 0.22578787803649902, + "step": 3452 + }, + { + "epoch": 1.7062893858890398, + "grad_norm": 1.2884557931863843, + "learning_rate": 1.1624392825476016e-06, + "loss": 0.1946491301059723, + "step": 3453 + }, + { + "epoch": 1.706783640182874, + "grad_norm": 1.7214838792794764, + "learning_rate": 1.158618821758134e-06, + "loss": 0.2099667191505432, + "step": 3454 + }, + { + "epoch": 1.7072778944767082, + "grad_norm": 1.3956932051100446, + "learning_rate": 1.1548042633484148e-06, + "loss": 0.22660428285598755, + "step": 3455 + }, + { + "epoch": 1.7077721487705424, + "grad_norm": 1.486801447510752, + "learning_rate": 1.1509956098649855e-06, + "loss": 0.27378255128860474, + "step": 3456 + }, + { + "epoch": 1.7082664030643766, + "grad_norm": 1.3265929348116055, + "learning_rate": 1.1471928638504504e-06, + "loss": 0.2209164947271347, + "step": 3457 + }, + { + "epoch": 1.7087606573582108, + "grad_norm": 1.4225246621575494, + "learning_rate": 1.1433960278434687e-06, + "loss": 0.24310322105884552, + "step": 3458 + }, + { + "epoch": 1.709254911652045, + "grad_norm": 1.408175906725771, + "learning_rate": 1.1396051043787526e-06, + "loss": 0.23209068179130554, + "step": 3459 + }, + { + "epoch": 1.7097491659458792, + "grad_norm": 1.3815567972930465, + "learning_rate": 1.1358200959870703e-06, + "loss": 0.2514454126358032, + "step": 3460 + }, + { + "epoch": 1.7102434202397134, + "grad_norm": 1.4417631759146625, + "learning_rate": 1.132041005195239e-06, + "loss": 0.2580721378326416, + "step": 3461 + }, + { + "epoch": 1.7107376745335476, + "grad_norm": 1.3709268368925525, + "learning_rate": 1.1282678345261234e-06, + "loss": 0.26388949155807495, + "step": 3462 + }, + { + "epoch": 1.7112319288273818, + "grad_norm": 1.2783952905855267, + "learning_rate": 1.1245005864986402e-06, + "loss": 0.2194654643535614, + "step": 3463 + }, + { + "epoch": 1.7117261831212158, + "grad_norm": 1.2633121407835717, + "learning_rate": 1.1207392636277502e-06, + "loss": 0.2048814296722412, + "step": 3464 + }, + { + "epoch": 1.71222043741505, + "grad_norm": 1.33926020269927, + "learning_rate": 1.1169838684244584e-06, + "loss": 0.24165832996368408, + "step": 3465 + }, + { + "epoch": 1.7127146917088842, + "grad_norm": 1.3906329052137327, + "learning_rate": 1.1132344033958132e-06, + "loss": 0.2484482377767563, + "step": 3466 + }, + { + "epoch": 1.7132089460027184, + "grad_norm": 1.4564028814853938, + "learning_rate": 1.1094908710449048e-06, + "loss": 0.2406741827726364, + "step": 3467 + }, + { + "epoch": 1.7137032002965524, + "grad_norm": 1.4018531611252434, + "learning_rate": 1.1057532738708588e-06, + "loss": 0.2417721152305603, + "step": 3468 + }, + { + "epoch": 1.7141974545903866, + "grad_norm": 1.4560734194910743, + "learning_rate": 1.1020216143688446e-06, + "loss": 0.26304543018341064, + "step": 3469 + }, + { + "epoch": 1.7146917088842208, + "grad_norm": 1.476031518585943, + "learning_rate": 1.098295895030066e-06, + "loss": 0.30013689398765564, + "step": 3470 + }, + { + "epoch": 1.715185963178055, + "grad_norm": 1.3175345714713855, + "learning_rate": 1.0945761183417569e-06, + "loss": 0.21451817452907562, + "step": 3471 + }, + { + "epoch": 1.7156802174718893, + "grad_norm": 1.3300365419760627, + "learning_rate": 1.0908622867871854e-06, + "loss": 0.235377699136734, + "step": 3472 + }, + { + "epoch": 1.7161744717657235, + "grad_norm": 1.2866674867130445, + "learning_rate": 1.0871544028456594e-06, + "loss": 0.23560425639152527, + "step": 3473 + }, + { + "epoch": 1.7166687260595577, + "grad_norm": 1.3385949926310057, + "learning_rate": 1.083452468992503e-06, + "loss": 0.2431229054927826, + "step": 3474 + }, + { + "epoch": 1.7171629803533919, + "grad_norm": 1.2089508133597444, + "learning_rate": 1.0797564876990762e-06, + "loss": 0.211553692817688, + "step": 3475 + }, + { + "epoch": 1.717657234647226, + "grad_norm": 1.3533177183735723, + "learning_rate": 1.0760664614327643e-06, + "loss": 0.23565953969955444, + "step": 3476 + }, + { + "epoch": 1.7181514889410603, + "grad_norm": 1.328162178864468, + "learning_rate": 1.0723823926569744e-06, + "loss": 0.2052966058254242, + "step": 3477 + }, + { + "epoch": 1.7186457432348945, + "grad_norm": 1.3067945675468369, + "learning_rate": 1.06870428383114e-06, + "loss": 0.24831204116344452, + "step": 3478 + }, + { + "epoch": 1.7191399975287285, + "grad_norm": 1.273169118321956, + "learning_rate": 1.0650321374107142e-06, + "loss": 0.24706462025642395, + "step": 3479 + }, + { + "epoch": 1.7196342518225627, + "grad_norm": 1.4211234189057285, + "learning_rate": 1.0613659558471644e-06, + "loss": 0.20845818519592285, + "step": 3480 + }, + { + "epoch": 1.720128506116397, + "grad_norm": 1.2323642708024432, + "learning_rate": 1.0577057415879887e-06, + "loss": 0.21599797904491425, + "step": 3481 + }, + { + "epoch": 1.720622760410231, + "grad_norm": 1.4618240857831881, + "learning_rate": 1.054051497076689e-06, + "loss": 0.2381049394607544, + "step": 3482 + }, + { + "epoch": 1.721117014704065, + "grad_norm": 1.3155008449637104, + "learning_rate": 1.0504032247527874e-06, + "loss": 0.22402817010879517, + "step": 3483 + }, + { + "epoch": 1.7216112689978993, + "grad_norm": 1.5409902580545625, + "learning_rate": 1.0467609270518186e-06, + "loss": 0.24406251311302185, + "step": 3484 + }, + { + "epoch": 1.7221055232917335, + "grad_norm": 1.339222294791023, + "learning_rate": 1.0431246064053291e-06, + "loss": 0.24388936161994934, + "step": 3485 + }, + { + "epoch": 1.7225997775855677, + "grad_norm": 1.3265412686691833, + "learning_rate": 1.0394942652408735e-06, + "loss": 0.26131671667099, + "step": 3486 + }, + { + "epoch": 1.723094031879402, + "grad_norm": 1.3718768259485188, + "learning_rate": 1.0358699059820188e-06, + "loss": 0.247392475605011, + "step": 3487 + }, + { + "epoch": 1.7235882861732361, + "grad_norm": 1.335920284358623, + "learning_rate": 1.0322515310483316e-06, + "loss": 0.22713768482208252, + "step": 3488 + }, + { + "epoch": 1.7240825404670703, + "grad_norm": 1.3821197244420464, + "learning_rate": 1.0286391428553854e-06, + "loss": 0.2544357180595398, + "step": 3489 + }, + { + "epoch": 1.7245767947609045, + "grad_norm": 1.260460911336476, + "learning_rate": 1.0250327438147678e-06, + "loss": 0.23186656832695007, + "step": 3490 + }, + { + "epoch": 1.7250710490547387, + "grad_norm": 1.1804266448755296, + "learning_rate": 1.0214323363340506e-06, + "loss": 0.20387035608291626, + "step": 3491 + }, + { + "epoch": 1.725565303348573, + "grad_norm": 1.4265943405789598, + "learning_rate": 1.017837922816819e-06, + "loss": 0.25391846895217896, + "step": 3492 + }, + { + "epoch": 1.7260595576424071, + "grad_norm": 1.2603447890118837, + "learning_rate": 1.014249505662649e-06, + "loss": 0.23214812576770782, + "step": 3493 + }, + { + "epoch": 1.7265538119362414, + "grad_norm": 1.5899981641866812, + "learning_rate": 1.0106670872671187e-06, + "loss": 0.31888365745544434, + "step": 3494 + }, + { + "epoch": 1.7270480662300753, + "grad_norm": 1.2907611357867346, + "learning_rate": 1.0070906700217998e-06, + "loss": 0.23372362554073334, + "step": 3495 + }, + { + "epoch": 1.7275423205239095, + "grad_norm": 1.2449017093435057, + "learning_rate": 1.0035202563142577e-06, + "loss": 0.20082907378673553, + "step": 3496 + }, + { + "epoch": 1.7280365748177438, + "grad_norm": 1.3171397747083256, + "learning_rate": 9.99955848528046e-07, + "loss": 0.23895825445652008, + "step": 3497 + }, + { + "epoch": 1.728530829111578, + "grad_norm": 1.4142591511055072, + "learning_rate": 9.963974490427153e-07, + "loss": 0.30089694261550903, + "step": 3498 + }, + { + "epoch": 1.729025083405412, + "grad_norm": 1.4071492496267155, + "learning_rate": 9.928450602338046e-07, + "loss": 0.28134891390800476, + "step": 3499 + }, + { + "epoch": 1.7295193376992462, + "grad_norm": 1.239666390023503, + "learning_rate": 9.892986844728325e-07, + "loss": 0.1947125792503357, + "step": 3500 + }, + { + "epoch": 1.7300135919930804, + "grad_norm": 1.2560350647671819, + "learning_rate": 9.857583241273116e-07, + "loss": 0.252549409866333, + "step": 3501 + }, + { + "epoch": 1.7305078462869146, + "grad_norm": 1.8080125735095465, + "learning_rate": 9.82223981560736e-07, + "loss": 0.28061211109161377, + "step": 3502 + }, + { + "epoch": 1.7310021005807488, + "grad_norm": 1.3465400182463805, + "learning_rate": 9.786956591325813e-07, + "loss": 0.2492327094078064, + "step": 3503 + }, + { + "epoch": 1.731496354874583, + "grad_norm": 1.3114105920039891, + "learning_rate": 9.75173359198307e-07, + "loss": 0.20470373332500458, + "step": 3504 + }, + { + "epoch": 1.7319906091684172, + "grad_norm": 1.4582343704980485, + "learning_rate": 9.716570841093476e-07, + "loss": 0.24190351366996765, + "step": 3505 + }, + { + "epoch": 1.7324848634622514, + "grad_norm": 1.3916465638756335, + "learning_rate": 9.681468362131209e-07, + "loss": 0.28784725069999695, + "step": 3506 + }, + { + "epoch": 1.7329791177560856, + "grad_norm": 1.4872057430892556, + "learning_rate": 9.646426178530176e-07, + "loss": 0.2676560878753662, + "step": 3507 + }, + { + "epoch": 1.7334733720499198, + "grad_norm": 1.4118374661566944, + "learning_rate": 9.611444313684027e-07, + "loss": 0.2493928223848343, + "step": 3508 + }, + { + "epoch": 1.733967626343754, + "grad_norm": 1.272854491876895, + "learning_rate": 9.57652279094613e-07, + "loss": 0.23272472620010376, + "step": 3509 + }, + { + "epoch": 1.734461880637588, + "grad_norm": 1.3295460481124186, + "learning_rate": 9.541661633629662e-07, + "loss": 0.23245804011821747, + "step": 3510 + }, + { + "epoch": 1.7349561349314222, + "grad_norm": 1.318916212284511, + "learning_rate": 9.506860865007373e-07, + "loss": 0.22367024421691895, + "step": 3511 + }, + { + "epoch": 1.7354503892252564, + "grad_norm": 1.312738075120818, + "learning_rate": 9.472120508311788e-07, + "loss": 0.22332677245140076, + "step": 3512 + }, + { + "epoch": 1.7359446435190906, + "grad_norm": 1.3669711817276102, + "learning_rate": 9.437440586735081e-07, + "loss": 0.28051453828811646, + "step": 3513 + }, + { + "epoch": 1.7364388978129246, + "grad_norm": 1.5089189064457602, + "learning_rate": 9.402821123429017e-07, + "loss": 0.24815741181373596, + "step": 3514 + }, + { + "epoch": 1.7369331521067588, + "grad_norm": 1.338757796188803, + "learning_rate": 9.368262141505114e-07, + "loss": 0.24077603220939636, + "step": 3515 + }, + { + "epoch": 1.737427406400593, + "grad_norm": 1.419717776508751, + "learning_rate": 9.333763664034457e-07, + "loss": 0.24596062302589417, + "step": 3516 + }, + { + "epoch": 1.7379216606944272, + "grad_norm": 1.3347588363810814, + "learning_rate": 9.299325714047702e-07, + "loss": 0.22939634323120117, + "step": 3517 + }, + { + "epoch": 1.7384159149882614, + "grad_norm": 1.446015117761441, + "learning_rate": 9.264948314535116e-07, + "loss": 0.24870653450489044, + "step": 3518 + }, + { + "epoch": 1.7389101692820956, + "grad_norm": 1.2985600743859553, + "learning_rate": 9.23063148844664e-07, + "loss": 0.24589623510837555, + "step": 3519 + }, + { + "epoch": 1.7394044235759298, + "grad_norm": 1.3138002527909343, + "learning_rate": 9.196375258691615e-07, + "loss": 0.24228474497795105, + "step": 3520 + }, + { + "epoch": 1.739898677869764, + "grad_norm": 1.274631487561465, + "learning_rate": 9.162179648139047e-07, + "loss": 0.24371150135993958, + "step": 3521 + }, + { + "epoch": 1.7403929321635982, + "grad_norm": 1.279720023026326, + "learning_rate": 9.128044679617432e-07, + "loss": 0.24775750935077667, + "step": 3522 + }, + { + "epoch": 1.7408871864574325, + "grad_norm": 1.5257492514284694, + "learning_rate": 9.093970375914784e-07, + "loss": 0.2893243432044983, + "step": 3523 + }, + { + "epoch": 1.7413814407512667, + "grad_norm": 1.300861064044251, + "learning_rate": 9.059956759778632e-07, + "loss": 0.24014830589294434, + "step": 3524 + }, + { + "epoch": 1.7418756950451009, + "grad_norm": 1.399249837900177, + "learning_rate": 9.026003853915977e-07, + "loss": 0.21439003944396973, + "step": 3525 + }, + { + "epoch": 1.7423699493389349, + "grad_norm": 1.3253623378225632, + "learning_rate": 8.992111680993265e-07, + "loss": 0.23376847803592682, + "step": 3526 + }, + { + "epoch": 1.742864203632769, + "grad_norm": 1.3914877634645069, + "learning_rate": 8.958280263636487e-07, + "loss": 0.244795560836792, + "step": 3527 + }, + { + "epoch": 1.7433584579266033, + "grad_norm": 1.3847661327530765, + "learning_rate": 8.924509624430955e-07, + "loss": 0.2513751685619354, + "step": 3528 + }, + { + "epoch": 1.7438527122204373, + "grad_norm": 1.3808839230401615, + "learning_rate": 8.890799785921478e-07, + "loss": 0.2118893414735794, + "step": 3529 + }, + { + "epoch": 1.7443469665142715, + "grad_norm": 1.4606627623109902, + "learning_rate": 8.857150770612288e-07, + "loss": 0.2834109365940094, + "step": 3530 + }, + { + "epoch": 1.7448412208081057, + "grad_norm": 1.3959930901293698, + "learning_rate": 8.823562600966962e-07, + "loss": 0.2546151876449585, + "step": 3531 + }, + { + "epoch": 1.7453354751019399, + "grad_norm": 1.3410984246991777, + "learning_rate": 8.790035299408494e-07, + "loss": 0.2654607594013214, + "step": 3532 + }, + { + "epoch": 1.745829729395774, + "grad_norm": 1.4773453802832905, + "learning_rate": 8.756568888319239e-07, + "loss": 0.2720295786857605, + "step": 3533 + }, + { + "epoch": 1.7463239836896083, + "grad_norm": 1.3341271298777078, + "learning_rate": 8.723163390040856e-07, + "loss": 0.22259725630283356, + "step": 3534 + }, + { + "epoch": 1.7468182379834425, + "grad_norm": 1.3952830917524783, + "learning_rate": 8.68981882687443e-07, + "loss": 0.22918277978897095, + "step": 3535 + }, + { + "epoch": 1.7473124922772767, + "grad_norm": 1.4553860122555766, + "learning_rate": 8.656535221080297e-07, + "loss": 0.24396009743213654, + "step": 3536 + }, + { + "epoch": 1.747806746571111, + "grad_norm": 1.4530449395488945, + "learning_rate": 8.623312594878097e-07, + "loss": 0.2370900958776474, + "step": 3537 + }, + { + "epoch": 1.748301000864945, + "grad_norm": 1.4353409191789361, + "learning_rate": 8.590150970446798e-07, + "loss": 0.2785671055316925, + "step": 3538 + }, + { + "epoch": 1.7487952551587793, + "grad_norm": 1.3531168663907844, + "learning_rate": 8.557050369924624e-07, + "loss": 0.29365241527557373, + "step": 3539 + }, + { + "epoch": 1.7492895094526135, + "grad_norm": 1.3579124483240532, + "learning_rate": 8.524010815409068e-07, + "loss": 0.24052876234054565, + "step": 3540 + }, + { + "epoch": 1.7497837637464475, + "grad_norm": 1.5096531715278536, + "learning_rate": 8.49103232895685e-07, + "loss": 0.23938694596290588, + "step": 3541 + }, + { + "epoch": 1.7502780180402817, + "grad_norm": 1.2842245856075563, + "learning_rate": 8.458114932583961e-07, + "loss": 0.2244144231081009, + "step": 3542 + }, + { + "epoch": 1.750772272334116, + "grad_norm": 1.4659940645429403, + "learning_rate": 8.425258648265544e-07, + "loss": 0.25028878450393677, + "step": 3543 + }, + { + "epoch": 1.7512665266279501, + "grad_norm": 1.414718407414415, + "learning_rate": 8.39246349793602e-07, + "loss": 0.23135274648666382, + "step": 3544 + }, + { + "epoch": 1.751760780921784, + "grad_norm": 1.3004631081596045, + "learning_rate": 8.359729503488967e-07, + "loss": 0.23874548077583313, + "step": 3545 + }, + { + "epoch": 1.7522550352156183, + "grad_norm": 1.4912661633646227, + "learning_rate": 8.327056686777102e-07, + "loss": 0.2780659794807434, + "step": 3546 + }, + { + "epoch": 1.7527492895094525, + "grad_norm": 1.3424848463452685, + "learning_rate": 8.294445069612356e-07, + "loss": 0.213335320353508, + "step": 3547 + }, + { + "epoch": 1.7532435438032867, + "grad_norm": 1.3764395925344186, + "learning_rate": 8.261894673765757e-07, + "loss": 0.23284730315208435, + "step": 3548 + }, + { + "epoch": 1.753737798097121, + "grad_norm": 1.4152912967440003, + "learning_rate": 8.229405520967504e-07, + "loss": 0.25429633259773254, + "step": 3549 + }, + { + "epoch": 1.7542320523909551, + "grad_norm": 1.42166486412748, + "learning_rate": 8.196977632906877e-07, + "loss": 0.2519379258155823, + "step": 3550 + }, + { + "epoch": 1.7547263066847893, + "grad_norm": 1.3397514660513317, + "learning_rate": 8.164611031232283e-07, + "loss": 0.2510948181152344, + "step": 3551 + }, + { + "epoch": 1.7552205609786236, + "grad_norm": 1.4391737307664527, + "learning_rate": 8.132305737551193e-07, + "loss": 0.27415433526039124, + "step": 3552 + }, + { + "epoch": 1.7557148152724578, + "grad_norm": 1.4503824956137814, + "learning_rate": 8.100061773430179e-07, + "loss": 0.26723912358283997, + "step": 3553 + }, + { + "epoch": 1.756209069566292, + "grad_norm": 1.3305646078685684, + "learning_rate": 8.067879160394821e-07, + "loss": 0.2710701823234558, + "step": 3554 + }, + { + "epoch": 1.7567033238601262, + "grad_norm": 1.2981752509304552, + "learning_rate": 8.035757919929765e-07, + "loss": 0.23247234523296356, + "step": 3555 + }, + { + "epoch": 1.7571975781539602, + "grad_norm": 1.3788336069912301, + "learning_rate": 8.003698073478749e-07, + "loss": 0.2514559328556061, + "step": 3556 + }, + { + "epoch": 1.7576918324477944, + "grad_norm": 1.2669691261364102, + "learning_rate": 7.971699642444419e-07, + "loss": 0.23549199104309082, + "step": 3557 + }, + { + "epoch": 1.7581860867416286, + "grad_norm": 1.326325870924157, + "learning_rate": 7.939762648188476e-07, + "loss": 0.24511446058750153, + "step": 3558 + }, + { + "epoch": 1.7586803410354628, + "grad_norm": 1.244030857989509, + "learning_rate": 7.907887112031609e-07, + "loss": 0.18705075979232788, + "step": 3559 + }, + { + "epoch": 1.7591745953292968, + "grad_norm": 1.3163815425830492, + "learning_rate": 7.876073055253474e-07, + "loss": 0.24297048151493073, + "step": 3560 + }, + { + "epoch": 1.759668849623131, + "grad_norm": 1.3886968971610452, + "learning_rate": 7.844320499092683e-07, + "loss": 0.239119753241539, + "step": 3561 + }, + { + "epoch": 1.7601631039169652, + "grad_norm": 1.3716161630664097, + "learning_rate": 7.81262946474679e-07, + "loss": 0.2430122196674347, + "step": 3562 + }, + { + "epoch": 1.7606573582107994, + "grad_norm": 1.5018987096099226, + "learning_rate": 7.78099997337225e-07, + "loss": 0.2785049378871918, + "step": 3563 + }, + { + "epoch": 1.7611516125046336, + "grad_norm": 1.324774124882076, + "learning_rate": 7.749432046084471e-07, + "loss": 0.2451494038105011, + "step": 3564 + }, + { + "epoch": 1.7616458667984678, + "grad_norm": 1.2759037312949375, + "learning_rate": 7.717925703957785e-07, + "loss": 0.20071648061275482, + "step": 3565 + }, + { + "epoch": 1.762140121092302, + "grad_norm": 1.265455917769001, + "learning_rate": 7.686480968025333e-07, + "loss": 0.22308245301246643, + "step": 3566 + }, + { + "epoch": 1.7626343753861362, + "grad_norm": 1.4753453520092665, + "learning_rate": 7.655097859279192e-07, + "loss": 0.26082009077072144, + "step": 3567 + }, + { + "epoch": 1.7631286296799704, + "grad_norm": 1.2035646972809244, + "learning_rate": 7.623776398670268e-07, + "loss": 0.21026611328125, + "step": 3568 + }, + { + "epoch": 1.7636228839738046, + "grad_norm": 1.3616311603644673, + "learning_rate": 7.592516607108324e-07, + "loss": 0.23878465592861176, + "step": 3569 + }, + { + "epoch": 1.7641171382676388, + "grad_norm": 1.4512524044419246, + "learning_rate": 7.561318505461956e-07, + "loss": 0.30288150906562805, + "step": 3570 + }, + { + "epoch": 1.764611392561473, + "grad_norm": 1.3464088406966324, + "learning_rate": 7.530182114558582e-07, + "loss": 0.25749915838241577, + "step": 3571 + }, + { + "epoch": 1.765105646855307, + "grad_norm": 1.4850779133681176, + "learning_rate": 7.499107455184351e-07, + "loss": 0.23799163103103638, + "step": 3572 + }, + { + "epoch": 1.7655999011491412, + "grad_norm": 1.2970926183891958, + "learning_rate": 7.46809454808436e-07, + "loss": 0.2626670002937317, + "step": 3573 + }, + { + "epoch": 1.7660941554429754, + "grad_norm": 1.4394447645143165, + "learning_rate": 7.437143413962299e-07, + "loss": 0.23273026943206787, + "step": 3574 + }, + { + "epoch": 1.7665884097368096, + "grad_norm": 1.329151714167698, + "learning_rate": 7.406254073480735e-07, + "loss": 0.22592151165008545, + "step": 3575 + }, + { + "epoch": 1.7670826640306436, + "grad_norm": 1.4000212660765223, + "learning_rate": 7.375426547260944e-07, + "loss": 0.2594859004020691, + "step": 3576 + }, + { + "epoch": 1.7675769183244778, + "grad_norm": 1.2114788921542652, + "learning_rate": 7.344660855882946e-07, + "loss": 0.2161571979522705, + "step": 3577 + }, + { + "epoch": 1.768071172618312, + "grad_norm": 1.2669666342048183, + "learning_rate": 7.313957019885487e-07, + "loss": 0.23052990436553955, + "step": 3578 + }, + { + "epoch": 1.7685654269121462, + "grad_norm": 1.2921856609362714, + "learning_rate": 7.283315059766005e-07, + "loss": 0.2309163510799408, + "step": 3579 + }, + { + "epoch": 1.7690596812059804, + "grad_norm": 1.3800150012724666, + "learning_rate": 7.252734995980604e-07, + "loss": 0.24543863534927368, + "step": 3580 + }, + { + "epoch": 1.7695539354998147, + "grad_norm": 1.315509052214176, + "learning_rate": 7.22221684894413e-07, + "loss": 0.27616050839424133, + "step": 3581 + }, + { + "epoch": 1.7700481897936489, + "grad_norm": 1.5849292816622715, + "learning_rate": 7.191760639030077e-07, + "loss": 0.2247719019651413, + "step": 3582 + }, + { + "epoch": 1.770542444087483, + "grad_norm": 1.3600242028973613, + "learning_rate": 7.161366386570545e-07, + "loss": 0.28721702098846436, + "step": 3583 + }, + { + "epoch": 1.7710366983813173, + "grad_norm": 1.3444976293289765, + "learning_rate": 7.131034111856294e-07, + "loss": 0.24191290140151978, + "step": 3584 + }, + { + "epoch": 1.7715309526751515, + "grad_norm": 1.3549546462173616, + "learning_rate": 7.100763835136748e-07, + "loss": 0.24049970507621765, + "step": 3585 + }, + { + "epoch": 1.7720252069689857, + "grad_norm": 1.4855378384649431, + "learning_rate": 7.070555576619887e-07, + "loss": 0.255404531955719, + "step": 3586 + }, + { + "epoch": 1.7725194612628197, + "grad_norm": 1.3672964019576628, + "learning_rate": 7.040409356472333e-07, + "loss": 0.23041129112243652, + "step": 3587 + }, + { + "epoch": 1.7730137155566539, + "grad_norm": 1.3790812567511086, + "learning_rate": 7.010325194819278e-07, + "loss": 0.2589847147464752, + "step": 3588 + }, + { + "epoch": 1.773507969850488, + "grad_norm": 1.4114272066031652, + "learning_rate": 6.980303111744424e-07, + "loss": 0.2604563236236572, + "step": 3589 + }, + { + "epoch": 1.7740022241443223, + "grad_norm": 1.3786249354000182, + "learning_rate": 6.950343127290138e-07, + "loss": 0.26831385493278503, + "step": 3590 + }, + { + "epoch": 1.7744964784381563, + "grad_norm": 1.3398044201914234, + "learning_rate": 6.920445261457276e-07, + "loss": 0.20475032925605774, + "step": 3591 + }, + { + "epoch": 1.7749907327319905, + "grad_norm": 1.669693479578031, + "learning_rate": 6.890609534205206e-07, + "loss": 0.32378682494163513, + "step": 3592 + }, + { + "epoch": 1.7754849870258247, + "grad_norm": 1.4433175991642826, + "learning_rate": 6.86083596545184e-07, + "loss": 0.2526070177555084, + "step": 3593 + }, + { + "epoch": 1.775979241319659, + "grad_norm": 1.3738645357999373, + "learning_rate": 6.831124575073578e-07, + "loss": 0.2467537820339203, + "step": 3594 + }, + { + "epoch": 1.776473495613493, + "grad_norm": 1.4660741149631984, + "learning_rate": 6.801475382905332e-07, + "loss": 0.2857215404510498, + "step": 3595 + }, + { + "epoch": 1.7769677499073273, + "grad_norm": 1.4443968381596262, + "learning_rate": 6.771888408740479e-07, + "loss": 0.23615087568759918, + "step": 3596 + }, + { + "epoch": 1.7774620042011615, + "grad_norm": 1.451390021672748, + "learning_rate": 6.742363672330854e-07, + "loss": 0.2613365054130554, + "step": 3597 + }, + { + "epoch": 1.7779562584949957, + "grad_norm": 1.465141872886975, + "learning_rate": 6.712901193386756e-07, + "loss": 0.2558417320251465, + "step": 3598 + }, + { + "epoch": 1.77845051278883, + "grad_norm": 1.4467371641088191, + "learning_rate": 6.683500991576919e-07, + "loss": 0.2683117091655731, + "step": 3599 + }, + { + "epoch": 1.7789447670826641, + "grad_norm": 1.4625204738144366, + "learning_rate": 6.654163086528487e-07, + "loss": 0.2546064555644989, + "step": 3600 + }, + { + "epoch": 1.7794390213764983, + "grad_norm": 1.5872307428555623, + "learning_rate": 6.624887497827004e-07, + "loss": 0.2683906555175781, + "step": 3601 + }, + { + "epoch": 1.7799332756703325, + "grad_norm": 1.363900663564542, + "learning_rate": 6.595674245016492e-07, + "loss": 0.23260846734046936, + "step": 3602 + }, + { + "epoch": 1.7804275299641665, + "grad_norm": 1.3840728964244504, + "learning_rate": 6.566523347599252e-07, + "loss": 0.22884608805179596, + "step": 3603 + }, + { + "epoch": 1.7809217842580007, + "grad_norm": 1.3583647776279095, + "learning_rate": 6.537434825036027e-07, + "loss": 0.24236485362052917, + "step": 3604 + }, + { + "epoch": 1.781416038551835, + "grad_norm": 1.4869775379128283, + "learning_rate": 6.508408696745893e-07, + "loss": 0.29543957114219666, + "step": 3605 + }, + { + "epoch": 1.781910292845669, + "grad_norm": 1.3626399619539873, + "learning_rate": 6.479444982106276e-07, + "loss": 0.24011383950710297, + "step": 3606 + }, + { + "epoch": 1.7824045471395031, + "grad_norm": 1.3135116984072812, + "learning_rate": 6.450543700452949e-07, + "loss": 0.248407244682312, + "step": 3607 + }, + { + "epoch": 1.7828988014333373, + "grad_norm": 1.4089475770026854, + "learning_rate": 6.421704871080004e-07, + "loss": 0.2405746728181839, + "step": 3608 + }, + { + "epoch": 1.7833930557271716, + "grad_norm": 1.2522903384339197, + "learning_rate": 6.392928513239804e-07, + "loss": 0.24601790308952332, + "step": 3609 + }, + { + "epoch": 1.7838873100210058, + "grad_norm": 1.2436557177887422, + "learning_rate": 6.36421464614303e-07, + "loss": 0.20030242204666138, + "step": 3610 + }, + { + "epoch": 1.78438156431484, + "grad_norm": 1.3296983724782687, + "learning_rate": 6.335563288958691e-07, + "loss": 0.23858311772346497, + "step": 3611 + }, + { + "epoch": 1.7848758186086742, + "grad_norm": 1.4392435044249465, + "learning_rate": 6.306974460813986e-07, + "loss": 0.2330242097377777, + "step": 3612 + }, + { + "epoch": 1.7853700729025084, + "grad_norm": 1.445863340067418, + "learning_rate": 6.278448180794416e-07, + "loss": 0.25513261556625366, + "step": 3613 + }, + { + "epoch": 1.7858643271963426, + "grad_norm": 1.3248647587522469, + "learning_rate": 6.249984467943737e-07, + "loss": 0.2298405021429062, + "step": 3614 + }, + { + "epoch": 1.7863585814901768, + "grad_norm": 1.3090685428520892, + "learning_rate": 6.221583341263893e-07, + "loss": 0.22120623290538788, + "step": 3615 + }, + { + "epoch": 1.786852835784011, + "grad_norm": 1.3392765156774626, + "learning_rate": 6.193244819715072e-07, + "loss": 0.26976969838142395, + "step": 3616 + }, + { + "epoch": 1.7873470900778452, + "grad_norm": 1.3657180436845977, + "learning_rate": 6.164968922215697e-07, + "loss": 0.24354586005210876, + "step": 3617 + }, + { + "epoch": 1.7878413443716792, + "grad_norm": 1.4254233164600292, + "learning_rate": 6.136755667642302e-07, + "loss": 0.2849498689174652, + "step": 3618 + }, + { + "epoch": 1.7883355986655134, + "grad_norm": 1.2708453781613391, + "learning_rate": 6.10860507482971e-07, + "loss": 0.2431584596633911, + "step": 3619 + }, + { + "epoch": 1.7888298529593476, + "grad_norm": 1.5031154285158648, + "learning_rate": 6.080517162570809e-07, + "loss": 0.2384781688451767, + "step": 3620 + }, + { + "epoch": 1.7893241072531818, + "grad_norm": 1.45686854578023, + "learning_rate": 6.052491949616712e-07, + "loss": 0.23782339692115784, + "step": 3621 + }, + { + "epoch": 1.7898183615470158, + "grad_norm": 1.342733882676876, + "learning_rate": 6.024529454676631e-07, + "loss": 0.23293447494506836, + "step": 3622 + }, + { + "epoch": 1.79031261584085, + "grad_norm": 1.2930495337650696, + "learning_rate": 5.996629696417955e-07, + "loss": 0.21202662587165833, + "step": 3623 + }, + { + "epoch": 1.7908068701346842, + "grad_norm": 1.5889243123202152, + "learning_rate": 5.968792693466141e-07, + "loss": 0.27971768379211426, + "step": 3624 + }, + { + "epoch": 1.7913011244285184, + "grad_norm": 1.441999540970622, + "learning_rate": 5.94101846440478e-07, + "loss": 0.2433638721704483, + "step": 3625 + }, + { + "epoch": 1.7917953787223526, + "grad_norm": 1.3682285780053611, + "learning_rate": 5.91330702777555e-07, + "loss": 0.21812602877616882, + "step": 3626 + }, + { + "epoch": 1.7922896330161868, + "grad_norm": 1.924541384200403, + "learning_rate": 5.88565840207822e-07, + "loss": 0.2135028839111328, + "step": 3627 + }, + { + "epoch": 1.792783887310021, + "grad_norm": 1.3226125497456243, + "learning_rate": 5.858072605770626e-07, + "loss": 0.23919226229190826, + "step": 3628 + }, + { + "epoch": 1.7932781416038552, + "grad_norm": 1.3008122554752455, + "learning_rate": 5.830549657268614e-07, + "loss": 0.2495008111000061, + "step": 3629 + }, + { + "epoch": 1.7937723958976894, + "grad_norm": 1.4679589100669386, + "learning_rate": 5.80308957494613e-07, + "loss": 0.2531805634498596, + "step": 3630 + }, + { + "epoch": 1.7942666501915236, + "grad_norm": 1.2654762717037664, + "learning_rate": 5.775692377135156e-07, + "loss": 0.22644619643688202, + "step": 3631 + }, + { + "epoch": 1.7947609044853579, + "grad_norm": 1.2567004368149646, + "learning_rate": 5.748358082125638e-07, + "loss": 0.2264411598443985, + "step": 3632 + }, + { + "epoch": 1.7952551587791918, + "grad_norm": 1.3206987713043599, + "learning_rate": 5.721086708165568e-07, + "loss": 0.2663921117782593, + "step": 3633 + }, + { + "epoch": 1.795749413073026, + "grad_norm": 1.35703763331278, + "learning_rate": 5.693878273460951e-07, + "loss": 0.2398051619529724, + "step": 3634 + }, + { + "epoch": 1.7962436673668603, + "grad_norm": 1.4184943078470147, + "learning_rate": 5.6667327961757e-07, + "loss": 0.28781580924987793, + "step": 3635 + }, + { + "epoch": 1.7967379216606945, + "grad_norm": 2.1761368991988084, + "learning_rate": 5.639650294431787e-07, + "loss": 0.2232055813074112, + "step": 3636 + }, + { + "epoch": 1.7972321759545284, + "grad_norm": 1.402577073030083, + "learning_rate": 5.612630786309103e-07, + "loss": 0.23214340209960938, + "step": 3637 + }, + { + "epoch": 1.7977264302483627, + "grad_norm": 1.2714718799747338, + "learning_rate": 5.585674289845467e-07, + "loss": 0.21598659455776215, + "step": 3638 + }, + { + "epoch": 1.7982206845421969, + "grad_norm": 1.351029180109128, + "learning_rate": 5.558780823036658e-07, + "loss": 0.2760176956653595, + "step": 3639 + }, + { + "epoch": 1.798714938836031, + "grad_norm": 1.3941723061811673, + "learning_rate": 5.531950403836373e-07, + "loss": 0.2641429901123047, + "step": 3640 + }, + { + "epoch": 1.7992091931298653, + "grad_norm": 1.390874465362023, + "learning_rate": 5.505183050156204e-07, + "loss": 0.2407502382993698, + "step": 3641 + }, + { + "epoch": 1.7997034474236995, + "grad_norm": 1.2164247841450622, + "learning_rate": 5.478478779865682e-07, + "loss": 0.19910940527915955, + "step": 3642 + }, + { + "epoch": 1.8001977017175337, + "grad_norm": 1.4412656091937792, + "learning_rate": 5.451837610792166e-07, + "loss": 0.2716234624385834, + "step": 3643 + }, + { + "epoch": 1.800691956011368, + "grad_norm": 1.3284477963142056, + "learning_rate": 5.42525956072093e-07, + "loss": 0.2784198224544525, + "step": 3644 + }, + { + "epoch": 1.801186210305202, + "grad_norm": 1.3444314874013155, + "learning_rate": 5.398744647395104e-07, + "loss": 0.2277904599905014, + "step": 3645 + }, + { + "epoch": 1.8016804645990363, + "grad_norm": 1.4299842617414134, + "learning_rate": 5.372292888515684e-07, + "loss": 0.26788002252578735, + "step": 3646 + }, + { + "epoch": 1.8021747188928705, + "grad_norm": 1.3607541160674654, + "learning_rate": 5.345904301741445e-07, + "loss": 0.22452175617218018, + "step": 3647 + }, + { + "epoch": 1.8026689731867047, + "grad_norm": 1.44450101040719, + "learning_rate": 5.319578904689071e-07, + "loss": 0.2337179332971573, + "step": 3648 + }, + { + "epoch": 1.8031632274805387, + "grad_norm": 1.3116281040368842, + "learning_rate": 5.293316714932983e-07, + "loss": 0.2614130973815918, + "step": 3649 + }, + { + "epoch": 1.803657481774373, + "grad_norm": 1.3142722561763884, + "learning_rate": 5.267117750005468e-07, + "loss": 0.2577320635318756, + "step": 3650 + }, + { + "epoch": 1.8041517360682071, + "grad_norm": 1.231846526151871, + "learning_rate": 5.24098202739658e-07, + "loss": 0.2058672308921814, + "step": 3651 + }, + { + "epoch": 1.8046459903620413, + "grad_norm": 1.3970882237865128, + "learning_rate": 5.214909564554138e-07, + "loss": 0.25223514437675476, + "step": 3652 + }, + { + "epoch": 1.8051402446558753, + "grad_norm": 1.3683940041570406, + "learning_rate": 5.188900378883765e-07, + "loss": 0.25651872158050537, + "step": 3653 + }, + { + "epoch": 1.8056344989497095, + "grad_norm": 1.3167902113360206, + "learning_rate": 5.162954487748828e-07, + "loss": 0.257855623960495, + "step": 3654 + }, + { + "epoch": 1.8061287532435437, + "grad_norm": 1.3408137381423195, + "learning_rate": 5.137071908470381e-07, + "loss": 0.22942093014717102, + "step": 3655 + }, + { + "epoch": 1.806623007537378, + "grad_norm": 1.3905585042591802, + "learning_rate": 5.111252658327326e-07, + "loss": 0.25629153847694397, + "step": 3656 + }, + { + "epoch": 1.8071172618312121, + "grad_norm": 1.3417957205977868, + "learning_rate": 5.085496754556207e-07, + "loss": 0.23882299661636353, + "step": 3657 + }, + { + "epoch": 1.8076115161250463, + "grad_norm": 1.3092883951034957, + "learning_rate": 5.059804214351283e-07, + "loss": 0.2323160469532013, + "step": 3658 + }, + { + "epoch": 1.8081057704188805, + "grad_norm": 1.318607555394289, + "learning_rate": 5.034175054864531e-07, + "loss": 0.2080869972705841, + "step": 3659 + }, + { + "epoch": 1.8086000247127147, + "grad_norm": 1.476319660825777, + "learning_rate": 5.008609293205624e-07, + "loss": 0.22439511120319366, + "step": 3660 + }, + { + "epoch": 1.809094279006549, + "grad_norm": 1.3639928518895943, + "learning_rate": 4.983106946441885e-07, + "loss": 0.2527809739112854, + "step": 3661 + }, + { + "epoch": 1.8095885333003832, + "grad_norm": 1.181172468164539, + "learning_rate": 4.957668031598328e-07, + "loss": 0.2149294763803482, + "step": 3662 + }, + { + "epoch": 1.8100827875942174, + "grad_norm": 1.3244234520799762, + "learning_rate": 4.932292565657615e-07, + "loss": 0.2471565306186676, + "step": 3663 + }, + { + "epoch": 1.8105770418880514, + "grad_norm": 1.328701941509414, + "learning_rate": 4.906980565560004e-07, + "loss": 0.25820282101631165, + "step": 3664 + }, + { + "epoch": 1.8110712961818856, + "grad_norm": 1.4538113944792308, + "learning_rate": 4.881732048203469e-07, + "loss": 0.2815645933151245, + "step": 3665 + }, + { + "epoch": 1.8115655504757198, + "grad_norm": 1.4078938194960222, + "learning_rate": 4.856547030443559e-07, + "loss": 0.23443330824375153, + "step": 3666 + }, + { + "epoch": 1.812059804769554, + "grad_norm": 1.413689966723704, + "learning_rate": 4.831425529093403e-07, + "loss": 0.2452373206615448, + "step": 3667 + }, + { + "epoch": 1.812554059063388, + "grad_norm": 1.2405057526282826, + "learning_rate": 4.806367560923764e-07, + "loss": 0.21815839409828186, + "step": 3668 + }, + { + "epoch": 1.8130483133572222, + "grad_norm": 1.3418751770168684, + "learning_rate": 4.781373142663003e-07, + "loss": 0.23436316847801208, + "step": 3669 + }, + { + "epoch": 1.8135425676510564, + "grad_norm": 1.277189547676361, + "learning_rate": 4.75644229099701e-07, + "loss": 0.18917132914066315, + "step": 3670 + }, + { + "epoch": 1.8140368219448906, + "grad_norm": 1.3842801505047626, + "learning_rate": 4.7315750225692905e-07, + "loss": 0.24570351839065552, + "step": 3671 + }, + { + "epoch": 1.8145310762387248, + "grad_norm": 1.2514343072057177, + "learning_rate": 4.7067713539808543e-07, + "loss": 0.23367956280708313, + "step": 3672 + }, + { + "epoch": 1.815025330532559, + "grad_norm": 1.372723501995688, + "learning_rate": 4.682031301790291e-07, + "loss": 0.24563322961330414, + "step": 3673 + }, + { + "epoch": 1.8155195848263932, + "grad_norm": 1.3552399849082646, + "learning_rate": 4.6573548825137204e-07, + "loss": 0.2425815761089325, + "step": 3674 + }, + { + "epoch": 1.8160138391202274, + "grad_norm": 1.2732667032266225, + "learning_rate": 4.632742112624744e-07, + "loss": 0.2173803597688675, + "step": 3675 + }, + { + "epoch": 1.8165080934140616, + "grad_norm": 1.4674070434763509, + "learning_rate": 4.6081930085544734e-07, + "loss": 0.2665477395057678, + "step": 3676 + }, + { + "epoch": 1.8170023477078958, + "grad_norm": 1.2335396057121188, + "learning_rate": 4.5837075866915994e-07, + "loss": 0.23834756016731262, + "step": 3677 + }, + { + "epoch": 1.81749660200173, + "grad_norm": 1.3614176095599289, + "learning_rate": 4.55928586338219e-07, + "loss": 0.2479294240474701, + "step": 3678 + }, + { + "epoch": 1.8179908562955642, + "grad_norm": 1.370567608566195, + "learning_rate": 4.5349278549298716e-07, + "loss": 0.24136531352996826, + "step": 3679 + }, + { + "epoch": 1.8184851105893982, + "grad_norm": 1.3881148070094378, + "learning_rate": 4.510633577595669e-07, + "loss": 0.24397623538970947, + "step": 3680 + }, + { + "epoch": 1.8189793648832324, + "grad_norm": 1.3189259944629108, + "learning_rate": 4.48640304759812e-07, + "loss": 0.27078694105148315, + "step": 3681 + }, + { + "epoch": 1.8194736191770666, + "grad_norm": 1.5222352072420349, + "learning_rate": 4.4622362811131745e-07, + "loss": 0.2544251084327698, + "step": 3682 + }, + { + "epoch": 1.8199678734709008, + "grad_norm": 1.3696668102162666, + "learning_rate": 4.4381332942742384e-07, + "loss": 0.2528873682022095, + "step": 3683 + }, + { + "epoch": 1.8204621277647348, + "grad_norm": 1.470119432024013, + "learning_rate": 4.414094103172084e-07, + "loss": 0.25487592816352844, + "step": 3684 + }, + { + "epoch": 1.820956382058569, + "grad_norm": 1.3872878168023053, + "learning_rate": 4.3901187238549414e-07, + "loss": 0.22061187028884888, + "step": 3685 + }, + { + "epoch": 1.8214506363524032, + "grad_norm": 1.355863796177502, + "learning_rate": 4.366207172328452e-07, + "loss": 0.2793615758419037, + "step": 3686 + }, + { + "epoch": 1.8219448906462374, + "grad_norm": 1.2429295933181803, + "learning_rate": 4.342359464555612e-07, + "loss": 0.2323140949010849, + "step": 3687 + }, + { + "epoch": 1.8224391449400716, + "grad_norm": 1.370663497944958, + "learning_rate": 4.3185756164568104e-07, + "loss": 0.2616409659385681, + "step": 3688 + }, + { + "epoch": 1.8229333992339058, + "grad_norm": 1.3843956978002738, + "learning_rate": 4.294855643909812e-07, + "loss": 0.203874871134758, + "step": 3689 + }, + { + "epoch": 1.82342765352774, + "grad_norm": 1.2289114807067458, + "learning_rate": 4.271199562749717e-07, + "loss": 0.2272878736257553, + "step": 3690 + }, + { + "epoch": 1.8239219078215743, + "grad_norm": 1.338434972419624, + "learning_rate": 4.247607388769004e-07, + "loss": 0.23728047311306, + "step": 3691 + }, + { + "epoch": 1.8244161621154085, + "grad_norm": 1.4750745226923418, + "learning_rate": 4.2240791377174737e-07, + "loss": 0.2570911943912506, + "step": 3692 + }, + { + "epoch": 1.8249104164092427, + "grad_norm": 1.4969254471055817, + "learning_rate": 4.200614825302207e-07, + "loss": 0.24265727400779724, + "step": 3693 + }, + { + "epoch": 1.8254046707030769, + "grad_norm": 1.405819385173928, + "learning_rate": 4.177214467187707e-07, + "loss": 0.24822816252708435, + "step": 3694 + }, + { + "epoch": 1.8258989249969109, + "grad_norm": 1.3218266218091017, + "learning_rate": 4.153878078995677e-07, + "loss": 0.23382046818733215, + "step": 3695 + }, + { + "epoch": 1.826393179290745, + "grad_norm": 1.4037010093048616, + "learning_rate": 4.130605676305166e-07, + "loss": 0.27590304613113403, + "step": 3696 + }, + { + "epoch": 1.8268874335845793, + "grad_norm": 1.4161501438852775, + "learning_rate": 4.1073972746525026e-07, + "loss": 0.25702038407325745, + "step": 3697 + }, + { + "epoch": 1.8273816878784135, + "grad_norm": 1.488627338365754, + "learning_rate": 4.0842528895312707e-07, + "loss": 0.28980135917663574, + "step": 3698 + }, + { + "epoch": 1.8278759421722475, + "grad_norm": 1.5075437506896323, + "learning_rate": 4.0611725363923435e-07, + "loss": 0.22739271819591522, + "step": 3699 + }, + { + "epoch": 1.8283701964660817, + "grad_norm": 1.4671495030162094, + "learning_rate": 4.038156230643853e-07, + "loss": 0.26396334171295166, + "step": 3700 + }, + { + "epoch": 1.8288644507599159, + "grad_norm": 1.5855861974203058, + "learning_rate": 4.015203987651106e-07, + "loss": 0.25548964738845825, + "step": 3701 + }, + { + "epoch": 1.82935870505375, + "grad_norm": 1.3315259515817186, + "learning_rate": 3.992315822736725e-07, + "loss": 0.22227105498313904, + "step": 3702 + }, + { + "epoch": 1.8298529593475843, + "grad_norm": 1.445413897274288, + "learning_rate": 3.969491751180543e-07, + "loss": 0.30854254961013794, + "step": 3703 + }, + { + "epoch": 1.8303472136414185, + "grad_norm": 1.4678349464130562, + "learning_rate": 3.946731788219538e-07, + "loss": 0.27471429109573364, + "step": 3704 + }, + { + "epoch": 1.8308414679352527, + "grad_norm": 1.334822235698922, + "learning_rate": 3.924035949047955e-07, + "loss": 0.2317768633365631, + "step": 3705 + }, + { + "epoch": 1.831335722229087, + "grad_norm": 1.4197098897896443, + "learning_rate": 3.901404248817231e-07, + "loss": 0.2450723946094513, + "step": 3706 + }, + { + "epoch": 1.8318299765229211, + "grad_norm": 1.4676009490842072, + "learning_rate": 3.878836702635935e-07, + "loss": 0.2428039014339447, + "step": 3707 + }, + { + "epoch": 1.8323242308167553, + "grad_norm": 1.4376208196933993, + "learning_rate": 3.856333325569861e-07, + "loss": 0.27869629859924316, + "step": 3708 + }, + { + "epoch": 1.8328184851105895, + "grad_norm": 1.2808253694997749, + "learning_rate": 3.8338941326419353e-07, + "loss": 0.21661749482154846, + "step": 3709 + }, + { + "epoch": 1.8333127394044237, + "grad_norm": 1.3452610575891626, + "learning_rate": 3.8115191388322206e-07, + "loss": 0.2655249834060669, + "step": 3710 + }, + { + "epoch": 1.8338069936982577, + "grad_norm": 1.3643896556477109, + "learning_rate": 3.7892083590779784e-07, + "loss": 0.2281903475522995, + "step": 3711 + }, + { + "epoch": 1.834301247992092, + "grad_norm": 1.492937654145658, + "learning_rate": 3.7669618082735504e-07, + "loss": 0.24545446038246155, + "step": 3712 + }, + { + "epoch": 1.8347955022859261, + "grad_norm": 1.2788794377367898, + "learning_rate": 3.7447795012704237e-07, + "loss": 0.24749556183815002, + "step": 3713 + }, + { + "epoch": 1.8352897565797601, + "grad_norm": 1.4606135919595513, + "learning_rate": 3.722661452877163e-07, + "loss": 0.26234689354896545, + "step": 3714 + }, + { + "epoch": 1.8357840108735943, + "grad_norm": 1.3697239858165842, + "learning_rate": 3.700607677859491e-07, + "loss": 0.21348389983177185, + "step": 3715 + }, + { + "epoch": 1.8362782651674285, + "grad_norm": 1.3198403259649356, + "learning_rate": 3.6786181909401864e-07, + "loss": 0.2527744770050049, + "step": 3716 + }, + { + "epoch": 1.8367725194612627, + "grad_norm": 1.3153305717810528, + "learning_rate": 3.6566930067991056e-07, + "loss": 0.2175026535987854, + "step": 3717 + }, + { + "epoch": 1.837266773755097, + "grad_norm": 1.3795015677920492, + "learning_rate": 3.6348321400731967e-07, + "loss": 0.2847272753715515, + "step": 3718 + }, + { + "epoch": 1.8377610280489312, + "grad_norm": 1.4885049894439106, + "learning_rate": 3.613035605356463e-07, + "loss": 0.2549072504043579, + "step": 3719 + }, + { + "epoch": 1.8382552823427654, + "grad_norm": 1.3444222427486383, + "learning_rate": 3.591303417199965e-07, + "loss": 0.24534013867378235, + "step": 3720 + }, + { + "epoch": 1.8387495366365996, + "grad_norm": 1.461602538702394, + "learning_rate": 3.5696355901117865e-07, + "loss": 0.25336408615112305, + "step": 3721 + }, + { + "epoch": 1.8392437909304338, + "grad_norm": 1.4932038589381658, + "learning_rate": 3.548032138557056e-07, + "loss": 0.2787632346153259, + "step": 3722 + }, + { + "epoch": 1.839738045224268, + "grad_norm": 1.3687827308256, + "learning_rate": 3.5264930769579595e-07, + "loss": 0.22364875674247742, + "step": 3723 + }, + { + "epoch": 1.8402322995181022, + "grad_norm": 1.509493433022075, + "learning_rate": 3.5050184196936285e-07, + "loss": 0.2526230216026306, + "step": 3724 + }, + { + "epoch": 1.8407265538119364, + "grad_norm": 1.449998297788816, + "learning_rate": 3.483608181100262e-07, + "loss": 0.2412932962179184, + "step": 3725 + }, + { + "epoch": 1.8412208081057704, + "grad_norm": 1.4100243345912178, + "learning_rate": 3.462262375471026e-07, + "loss": 0.28693705797195435, + "step": 3726 + }, + { + "epoch": 1.8417150623996046, + "grad_norm": 1.4369299703462226, + "learning_rate": 3.4409810170560667e-07, + "loss": 0.2600281834602356, + "step": 3727 + }, + { + "epoch": 1.8422093166934388, + "grad_norm": 1.3702328145360616, + "learning_rate": 3.4197641200625185e-07, + "loss": 0.24885150790214539, + "step": 3728 + }, + { + "epoch": 1.842703570987273, + "grad_norm": 1.476451776245579, + "learning_rate": 3.398611698654497e-07, + "loss": 0.27185115218162537, + "step": 3729 + }, + { + "epoch": 1.843197825281107, + "grad_norm": 1.6779196665373166, + "learning_rate": 3.377523766953006e-07, + "loss": 0.2999323010444641, + "step": 3730 + }, + { + "epoch": 1.8436920795749412, + "grad_norm": 1.3755033406487114, + "learning_rate": 3.356500339036106e-07, + "loss": 0.22807806730270386, + "step": 3731 + }, + { + "epoch": 1.8441863338687754, + "grad_norm": 1.4727836521575108, + "learning_rate": 3.3355414289387155e-07, + "loss": 0.23006726801395416, + "step": 3732 + }, + { + "epoch": 1.8446805881626096, + "grad_norm": 1.4892072813513704, + "learning_rate": 3.314647050652686e-07, + "loss": 0.25261276960372925, + "step": 3733 + }, + { + "epoch": 1.8451748424564438, + "grad_norm": 1.3741598151970273, + "learning_rate": 3.293817218126827e-07, + "loss": 0.2484148144721985, + "step": 3734 + }, + { + "epoch": 1.845669096750278, + "grad_norm": 1.2679669997107472, + "learning_rate": 3.273051945266836e-07, + "loss": 0.2472834438085556, + "step": 3735 + }, + { + "epoch": 1.8461633510441122, + "grad_norm": 1.16756829401485, + "learning_rate": 3.2523512459352923e-07, + "loss": 0.20510706305503845, + "step": 3736 + }, + { + "epoch": 1.8466576053379464, + "grad_norm": 1.292644423038628, + "learning_rate": 3.231715133951707e-07, + "loss": 0.2331993281841278, + "step": 3737 + }, + { + "epoch": 1.8471518596317806, + "grad_norm": 1.4584815860954135, + "learning_rate": 3.211143623092461e-07, + "loss": 0.2704228162765503, + "step": 3738 + }, + { + "epoch": 1.8476461139256148, + "grad_norm": 1.4579018041488718, + "learning_rate": 3.190636727090768e-07, + "loss": 0.2514714002609253, + "step": 3739 + }, + { + "epoch": 1.848140368219449, + "grad_norm": 1.258977256920419, + "learning_rate": 3.170194459636777e-07, + "loss": 0.2396089732646942, + "step": 3740 + }, + { + "epoch": 1.848634622513283, + "grad_norm": 1.4139144003983488, + "learning_rate": 3.149816834377428e-07, + "loss": 0.266484797000885, + "step": 3741 + }, + { + "epoch": 1.8491288768071172, + "grad_norm": 1.338105672337281, + "learning_rate": 3.129503864916539e-07, + "loss": 0.24549749493598938, + "step": 3742 + }, + { + "epoch": 1.8496231311009514, + "grad_norm": 1.6902480251834826, + "learning_rate": 3.1092555648147615e-07, + "loss": 0.2659090757369995, + "step": 3743 + }, + { + "epoch": 1.8501173853947857, + "grad_norm": 1.4018081288366548, + "learning_rate": 3.0890719475895615e-07, + "loss": 0.2756732702255249, + "step": 3744 + }, + { + "epoch": 1.8506116396886196, + "grad_norm": 1.3509953718874834, + "learning_rate": 3.068953026715238e-07, + "loss": 0.2568710148334503, + "step": 3745 + }, + { + "epoch": 1.8511058939824538, + "grad_norm": 1.3512798325752944, + "learning_rate": 3.048898815622914e-07, + "loss": 0.2255566120147705, + "step": 3746 + }, + { + "epoch": 1.851600148276288, + "grad_norm": 1.309385732750396, + "learning_rate": 3.028909327700458e-07, + "loss": 0.2083941102027893, + "step": 3747 + }, + { + "epoch": 1.8520944025701223, + "grad_norm": 1.2287507621351796, + "learning_rate": 3.0089845762926063e-07, + "loss": 0.20739290118217468, + "step": 3748 + }, + { + "epoch": 1.8525886568639565, + "grad_norm": 1.2356251229389228, + "learning_rate": 2.989124574700819e-07, + "loss": 0.21835210919380188, + "step": 3749 + }, + { + "epoch": 1.8530829111577907, + "grad_norm": 1.312598409351232, + "learning_rate": 2.969329336183335e-07, + "loss": 0.2170596569776535, + "step": 3750 + }, + { + "epoch": 1.8535771654516249, + "grad_norm": 1.3990932569701935, + "learning_rate": 2.949598873955184e-07, + "loss": 0.23584111034870148, + "step": 3751 + }, + { + "epoch": 1.854071419745459, + "grad_norm": 1.5531646127161125, + "learning_rate": 2.9299332011881623e-07, + "loss": 0.2690342664718628, + "step": 3752 + }, + { + "epoch": 1.8545656740392933, + "grad_norm": 1.2634424740078676, + "learning_rate": 2.9103323310107566e-07, + "loss": 0.2499091923236847, + "step": 3753 + }, + { + "epoch": 1.8550599283331275, + "grad_norm": 1.417744173198578, + "learning_rate": 2.8907962765082567e-07, + "loss": 0.23112377524375916, + "step": 3754 + }, + { + "epoch": 1.8555541826269617, + "grad_norm": 1.375590332914505, + "learning_rate": 2.8713250507226285e-07, + "loss": 0.25203657150268555, + "step": 3755 + }, + { + "epoch": 1.856048436920796, + "grad_norm": 1.4015552448571456, + "learning_rate": 2.8519186666526086e-07, + "loss": 0.2468508780002594, + "step": 3756 + }, + { + "epoch": 1.85654269121463, + "grad_norm": 1.427563584784084, + "learning_rate": 2.8325771372536e-07, + "loss": 0.22745928168296814, + "step": 3757 + }, + { + "epoch": 1.857036945508464, + "grad_norm": 1.2932963376428803, + "learning_rate": 2.8133004754377525e-07, + "loss": 0.23090660572052002, + "step": 3758 + }, + { + "epoch": 1.8575311998022983, + "grad_norm": 1.420318152152914, + "learning_rate": 2.7940886940738707e-07, + "loss": 0.27513352036476135, + "step": 3759 + }, + { + "epoch": 1.8580254540961325, + "grad_norm": 1.4517333399175874, + "learning_rate": 2.774941805987474e-07, + "loss": 0.25791019201278687, + "step": 3760 + }, + { + "epoch": 1.8585197083899665, + "grad_norm": 1.523404531013776, + "learning_rate": 2.75585982396076e-07, + "loss": 0.2703961730003357, + "step": 3761 + }, + { + "epoch": 1.8590139626838007, + "grad_norm": 1.4198437134006967, + "learning_rate": 2.736842760732561e-07, + "loss": 0.2557608485221863, + "step": 3762 + }, + { + "epoch": 1.859508216977635, + "grad_norm": 1.4276231211370918, + "learning_rate": 2.717890628998421e-07, + "loss": 0.26276740431785583, + "step": 3763 + }, + { + "epoch": 1.8600024712714691, + "grad_norm": 1.3830597360775128, + "learning_rate": 2.699003441410508e-07, + "loss": 0.3033446967601776, + "step": 3764 + }, + { + "epoch": 1.8604967255653033, + "grad_norm": 1.3975518004533982, + "learning_rate": 2.680181210577637e-07, + "loss": 0.2513597905635834, + "step": 3765 + }, + { + "epoch": 1.8609909798591375, + "grad_norm": 1.2527716887935596, + "learning_rate": 2.661423949065267e-07, + "loss": 0.22935059666633606, + "step": 3766 + }, + { + "epoch": 1.8614852341529717, + "grad_norm": 1.5028347517247218, + "learning_rate": 2.6427316693954596e-07, + "loss": 0.2585369348526001, + "step": 3767 + }, + { + "epoch": 1.861979488446806, + "grad_norm": 1.4129565265857094, + "learning_rate": 2.6241043840469104e-07, + "loss": 0.25701645016670227, + "step": 3768 + }, + { + "epoch": 1.8624737427406401, + "grad_norm": 1.304405538262163, + "learning_rate": 2.605542105454961e-07, + "loss": 0.24622182548046112, + "step": 3769 + }, + { + "epoch": 1.8629679970344744, + "grad_norm": 1.380891732165765, + "learning_rate": 2.5870448460114994e-07, + "loss": 0.2650758624076843, + "step": 3770 + }, + { + "epoch": 1.8634622513283086, + "grad_norm": 1.4721649336836553, + "learning_rate": 2.568612618065036e-07, + "loss": 0.2364269644021988, + "step": 3771 + }, + { + "epoch": 1.8639565056221425, + "grad_norm": 1.2217358212004363, + "learning_rate": 2.5502454339206617e-07, + "loss": 0.23226915299892426, + "step": 3772 + }, + { + "epoch": 1.8644507599159768, + "grad_norm": 1.3407554644381927, + "learning_rate": 2.5319433058400565e-07, + "loss": 0.23077306151390076, + "step": 3773 + }, + { + "epoch": 1.864945014209811, + "grad_norm": 1.289395146095016, + "learning_rate": 2.5137062460414476e-07, + "loss": 0.23707103729248047, + "step": 3774 + }, + { + "epoch": 1.8654392685036452, + "grad_norm": 1.3571808886592325, + "learning_rate": 2.4955342666996505e-07, + "loss": 0.268571138381958, + "step": 3775 + }, + { + "epoch": 1.8659335227974791, + "grad_norm": 1.4298616373621023, + "learning_rate": 2.4774273799459847e-07, + "loss": 0.21469517052173615, + "step": 3776 + }, + { + "epoch": 1.8664277770913134, + "grad_norm": 1.302386517113681, + "learning_rate": 2.45938559786838e-07, + "loss": 0.2513999938964844, + "step": 3777 + }, + { + "epoch": 1.8669220313851476, + "grad_norm": 1.2688339559395354, + "learning_rate": 2.44140893251128e-07, + "loss": 0.23660680651664734, + "step": 3778 + }, + { + "epoch": 1.8674162856789818, + "grad_norm": 1.499995655954345, + "learning_rate": 2.423497395875618e-07, + "loss": 0.24594557285308838, + "step": 3779 + }, + { + "epoch": 1.867910539972816, + "grad_norm": 1.4315211319459857, + "learning_rate": 2.405650999918896e-07, + "loss": 0.2725435793399811, + "step": 3780 + }, + { + "epoch": 1.8684047942666502, + "grad_norm": 1.3565937935517103, + "learning_rate": 2.3878697565551167e-07, + "loss": 0.25718316435813904, + "step": 3781 + }, + { + "epoch": 1.8688990485604844, + "grad_norm": 1.3523272274009415, + "learning_rate": 2.3701536776547851e-07, + "loss": 0.2546181082725525, + "step": 3782 + }, + { + "epoch": 1.8693933028543186, + "grad_norm": 1.1875597307843324, + "learning_rate": 2.3525027750448959e-07, + "loss": 0.22146770358085632, + "step": 3783 + }, + { + "epoch": 1.8698875571481528, + "grad_norm": 1.5616036933474096, + "learning_rate": 2.3349170605089456e-07, + "loss": 0.23873519897460938, + "step": 3784 + }, + { + "epoch": 1.870381811441987, + "grad_norm": 1.3056198220614723, + "learning_rate": 2.3173965457868875e-07, + "loss": 0.2530808746814728, + "step": 3785 + }, + { + "epoch": 1.8708760657358212, + "grad_norm": 1.5174642956273923, + "learning_rate": 2.2999412425751987e-07, + "loss": 0.21616236865520477, + "step": 3786 + }, + { + "epoch": 1.8713703200296554, + "grad_norm": 1.3867713509711206, + "learning_rate": 2.2825511625267583e-07, + "loss": 0.21596969664096832, + "step": 3787 + }, + { + "epoch": 1.8718645743234894, + "grad_norm": 1.4557650561795843, + "learning_rate": 2.265226317250957e-07, + "loss": 0.25873616337776184, + "step": 3788 + }, + { + "epoch": 1.8723588286173236, + "grad_norm": 1.3108065941801126, + "learning_rate": 2.247966718313599e-07, + "loss": 0.21096865832805634, + "step": 3789 + }, + { + "epoch": 1.8728530829111578, + "grad_norm": 1.374596799099242, + "learning_rate": 2.230772377236956e-07, + "loss": 0.2159111499786377, + "step": 3790 + }, + { + "epoch": 1.8733473372049918, + "grad_norm": 1.3658642346441578, + "learning_rate": 2.213643305499724e-07, + "loss": 0.2264566719532013, + "step": 3791 + }, + { + "epoch": 1.873841591498826, + "grad_norm": 1.2529368730648867, + "learning_rate": 2.1965795145370338e-07, + "loss": 0.216034397482872, + "step": 3792 + }, + { + "epoch": 1.8743358457926602, + "grad_norm": 1.2144868387665828, + "learning_rate": 2.1795810157404063e-07, + "loss": 0.22257745265960693, + "step": 3793 + }, + { + "epoch": 1.8748301000864944, + "grad_norm": 1.5075158608293073, + "learning_rate": 2.1626478204578082e-07, + "loss": 0.2569161653518677, + "step": 3794 + }, + { + "epoch": 1.8753243543803286, + "grad_norm": 1.3028902539101006, + "learning_rate": 2.1457799399936087e-07, + "loss": 0.24172556400299072, + "step": 3795 + }, + { + "epoch": 1.8758186086741628, + "grad_norm": 1.4100197142967315, + "learning_rate": 2.128977385608555e-07, + "loss": 0.25539106130599976, + "step": 3796 + }, + { + "epoch": 1.876312862967997, + "grad_norm": 1.3564195764364628, + "learning_rate": 2.1122401685197747e-07, + "loss": 0.23766650259494781, + "step": 3797 + }, + { + "epoch": 1.8768071172618312, + "grad_norm": 2.0847437292387516, + "learning_rate": 2.095568299900841e-07, + "loss": 0.24102288484573364, + "step": 3798 + }, + { + "epoch": 1.8773013715556655, + "grad_norm": 1.4163898812472968, + "learning_rate": 2.0789617908816063e-07, + "loss": 0.25168395042419434, + "step": 3799 + }, + { + "epoch": 1.8777956258494997, + "grad_norm": 1.2853968722580162, + "learning_rate": 2.0624206525483582e-07, + "loss": 0.23417149484157562, + "step": 3800 + }, + { + "epoch": 1.8782898801433339, + "grad_norm": 1.4002834822702614, + "learning_rate": 2.04594489594373e-07, + "loss": 0.2875264883041382, + "step": 3801 + }, + { + "epoch": 1.878784134437168, + "grad_norm": 1.3714454637927955, + "learning_rate": 2.0295345320667014e-07, + "loss": 0.24828693270683289, + "step": 3802 + }, + { + "epoch": 1.879278388731002, + "grad_norm": 1.3521250596424406, + "learning_rate": 2.013189571872587e-07, + "loss": 0.23279064893722534, + "step": 3803 + }, + { + "epoch": 1.8797726430248363, + "grad_norm": 1.1425181629308492, + "learning_rate": 1.996910026273058e-07, + "loss": 0.2099420577287674, + "step": 3804 + }, + { + "epoch": 1.8802668973186705, + "grad_norm": 1.346362344532125, + "learning_rate": 1.9806959061360985e-07, + "loss": 0.25043174624443054, + "step": 3805 + }, + { + "epoch": 1.8807611516125047, + "grad_norm": 1.3680517059526944, + "learning_rate": 1.9645472222860286e-07, + "loss": 0.2606011927127838, + "step": 3806 + }, + { + "epoch": 1.8812554059063387, + "grad_norm": 1.2606250431650987, + "learning_rate": 1.948463985503468e-07, + "loss": 0.22487565875053406, + "step": 3807 + }, + { + "epoch": 1.8817496602001729, + "grad_norm": 1.6823729371263936, + "learning_rate": 1.9324462065253735e-07, + "loss": 0.29611343145370483, + "step": 3808 + }, + { + "epoch": 1.882243914494007, + "grad_norm": 1.282763458334529, + "learning_rate": 1.9164938960449685e-07, + "loss": 0.2301706224679947, + "step": 3809 + }, + { + "epoch": 1.8827381687878413, + "grad_norm": 1.319243063789466, + "learning_rate": 1.9006070647118015e-07, + "loss": 0.2306794822216034, + "step": 3810 + }, + { + "epoch": 1.8832324230816755, + "grad_norm": 1.4208055299495237, + "learning_rate": 1.884785723131688e-07, + "loss": 0.2588786482810974, + "step": 3811 + }, + { + "epoch": 1.8837266773755097, + "grad_norm": 1.527285475263959, + "learning_rate": 1.8690298818667463e-07, + "loss": 0.2795346677303314, + "step": 3812 + }, + { + "epoch": 1.884220931669344, + "grad_norm": 1.2499989201376016, + "learning_rate": 1.853339551435318e-07, + "loss": 0.2313271164894104, + "step": 3813 + }, + { + "epoch": 1.884715185963178, + "grad_norm": 1.4803115521216077, + "learning_rate": 1.8377147423120467e-07, + "loss": 0.22814632952213287, + "step": 3814 + }, + { + "epoch": 1.8852094402570123, + "grad_norm": 1.3259243101199787, + "learning_rate": 1.822155464927866e-07, + "loss": 0.2605836093425751, + "step": 3815 + }, + { + "epoch": 1.8857036945508465, + "grad_norm": 1.3976508324913761, + "learning_rate": 1.8066617296699007e-07, + "loss": 0.23902952671051025, + "step": 3816 + }, + { + "epoch": 1.8861979488446807, + "grad_norm": 1.290435692515394, + "learning_rate": 1.7912335468815545e-07, + "loss": 0.24895761907100677, + "step": 3817 + }, + { + "epoch": 1.8866922031385147, + "grad_norm": 1.4446135232841222, + "learning_rate": 1.7758709268624664e-07, + "loss": 0.24108648300170898, + "step": 3818 + }, + { + "epoch": 1.887186457432349, + "grad_norm": 1.4071508146495701, + "learning_rate": 1.7605738798684767e-07, + "loss": 0.2600073516368866, + "step": 3819 + }, + { + "epoch": 1.8876807117261831, + "grad_norm": 1.3261487318829528, + "learning_rate": 1.745342416111706e-07, + "loss": 0.21564190089702606, + "step": 3820 + }, + { + "epoch": 1.8881749660200173, + "grad_norm": 1.4577577895280622, + "learning_rate": 1.7301765457604647e-07, + "loss": 0.24080556631088257, + "step": 3821 + }, + { + "epoch": 1.8886692203138513, + "grad_norm": 1.316642170468449, + "learning_rate": 1.7150762789392316e-07, + "loss": 0.22631056606769562, + "step": 3822 + }, + { + "epoch": 1.8891634746076855, + "grad_norm": 1.4341533325292704, + "learning_rate": 1.7000416257287654e-07, + "loss": 0.26355087757110596, + "step": 3823 + }, + { + "epoch": 1.8896577289015197, + "grad_norm": 1.387410149780388, + "learning_rate": 1.685072596165982e-07, + "loss": 0.248369500041008, + "step": 3824 + }, + { + "epoch": 1.890151983195354, + "grad_norm": 1.4331472853704903, + "learning_rate": 1.670169200243976e-07, + "loss": 0.2789249122142792, + "step": 3825 + }, + { + "epoch": 1.8906462374891881, + "grad_norm": 1.2052406993380367, + "learning_rate": 1.6553314479120453e-07, + "loss": 0.22493675351142883, + "step": 3826 + }, + { + "epoch": 1.8911404917830223, + "grad_norm": 1.2074956449276386, + "learning_rate": 1.6405593490756766e-07, + "loss": 0.21274074912071228, + "step": 3827 + }, + { + "epoch": 1.8916347460768566, + "grad_norm": 1.3986179942656674, + "learning_rate": 1.6258529135964928e-07, + "loss": 0.2591193914413452, + "step": 3828 + }, + { + "epoch": 1.8921290003706908, + "grad_norm": 1.5077061888652343, + "learning_rate": 1.6112121512923075e-07, + "loss": 0.2791387140750885, + "step": 3829 + }, + { + "epoch": 1.892623254664525, + "grad_norm": 1.449596307066075, + "learning_rate": 1.5966370719371015e-07, + "loss": 0.2840545177459717, + "step": 3830 + }, + { + "epoch": 1.8931175089583592, + "grad_norm": 1.538114321399184, + "learning_rate": 1.582127685260948e-07, + "loss": 0.2563555836677551, + "step": 3831 + }, + { + "epoch": 1.8936117632521934, + "grad_norm": 1.2897284655116197, + "learning_rate": 1.5676840009501538e-07, + "loss": 0.22912704944610596, + "step": 3832 + }, + { + "epoch": 1.8941060175460276, + "grad_norm": 1.3733822665309192, + "learning_rate": 1.5533060286470837e-07, + "loss": 0.25490787625312805, + "step": 3833 + }, + { + "epoch": 1.8946002718398616, + "grad_norm": 1.2282031018618578, + "learning_rate": 1.5389937779502818e-07, + "loss": 0.21826709806919098, + "step": 3834 + }, + { + "epoch": 1.8950945261336958, + "grad_norm": 1.303626845787231, + "learning_rate": 1.524747258414394e-07, + "loss": 0.2292749583721161, + "step": 3835 + }, + { + "epoch": 1.89558878042753, + "grad_norm": 1.3359905611934206, + "learning_rate": 1.5105664795501908e-07, + "loss": 0.24652332067489624, + "step": 3836 + }, + { + "epoch": 1.8960830347213642, + "grad_norm": 1.3777956922677133, + "learning_rate": 1.4964514508245652e-07, + "loss": 0.25154706835746765, + "step": 3837 + }, + { + "epoch": 1.8965772890151982, + "grad_norm": 1.3722697572324272, + "learning_rate": 1.482402181660525e-07, + "loss": 0.2414158582687378, + "step": 3838 + }, + { + "epoch": 1.8970715433090324, + "grad_norm": 1.3960215733148371, + "learning_rate": 1.4684186814371225e-07, + "loss": 0.22421908378601074, + "step": 3839 + }, + { + "epoch": 1.8975657976028666, + "grad_norm": 1.3337706977662172, + "learning_rate": 1.4545009594895687e-07, + "loss": 0.2506029009819031, + "step": 3840 + }, + { + "epoch": 1.8980600518967008, + "grad_norm": 1.239516400526973, + "learning_rate": 1.440649025109142e-07, + "loss": 0.2011726200580597, + "step": 3841 + }, + { + "epoch": 1.898554306190535, + "grad_norm": 1.5242598019660087, + "learning_rate": 1.4268628875431677e-07, + "loss": 0.27702796459198, + "step": 3842 + }, + { + "epoch": 1.8990485604843692, + "grad_norm": 1.236260659855922, + "learning_rate": 1.413142555995095e-07, + "loss": 0.23884715139865875, + "step": 3843 + }, + { + "epoch": 1.8995428147782034, + "grad_norm": 1.2385068593263413, + "learning_rate": 1.3994880396244304e-07, + "loss": 0.2191702425479889, + "step": 3844 + }, + { + "epoch": 1.9000370690720376, + "grad_norm": 1.3532676134331167, + "learning_rate": 1.385899347546704e-07, + "loss": 0.25425833463668823, + "step": 3845 + }, + { + "epoch": 1.9005313233658718, + "grad_norm": 1.3452712776781028, + "learning_rate": 1.37237648883356e-07, + "loss": 0.23355990648269653, + "step": 3846 + }, + { + "epoch": 1.901025577659706, + "grad_norm": 1.200878562022238, + "learning_rate": 1.3589194725126542e-07, + "loss": 0.2079685628414154, + "step": 3847 + }, + { + "epoch": 1.9015198319535402, + "grad_norm": 1.380798956497921, + "learning_rate": 1.3455283075676895e-07, + "loss": 0.25126928091049194, + "step": 3848 + }, + { + "epoch": 1.9020140862473742, + "grad_norm": 1.3306751541769635, + "learning_rate": 1.332203002938437e-07, + "loss": 0.2608864903450012, + "step": 3849 + }, + { + "epoch": 1.9025083405412084, + "grad_norm": 1.3536846944777874, + "learning_rate": 1.3189435675206697e-07, + "loss": 0.27048414945602417, + "step": 3850 + }, + { + "epoch": 1.9030025948350426, + "grad_norm": 1.3873264194773522, + "learning_rate": 1.3057500101661846e-07, + "loss": 0.24350577592849731, + "step": 3851 + }, + { + "epoch": 1.9034968491288768, + "grad_norm": 1.5060374095399143, + "learning_rate": 1.2926223396828363e-07, + "loss": 0.23283880949020386, + "step": 3852 + }, + { + "epoch": 1.9039911034227108, + "grad_norm": 1.3722502195381412, + "learning_rate": 1.2795605648344477e-07, + "loss": 0.23332493007183075, + "step": 3853 + }, + { + "epoch": 1.904485357716545, + "grad_norm": 1.2805992535782373, + "learning_rate": 1.2665646943408882e-07, + "loss": 0.19833901524543762, + "step": 3854 + }, + { + "epoch": 1.9049796120103792, + "grad_norm": 1.316108497317141, + "learning_rate": 1.2536347368780066e-07, + "loss": 0.23650333285331726, + "step": 3855 + }, + { + "epoch": 1.9054738663042134, + "grad_norm": 1.1749486485284195, + "learning_rate": 1.240770701077665e-07, + "loss": 0.20151859521865845, + "step": 3856 + }, + { + "epoch": 1.9059681205980477, + "grad_norm": 1.4620220273758984, + "learning_rate": 1.2279725955277044e-07, + "loss": 0.32347559928894043, + "step": 3857 + }, + { + "epoch": 1.9064623748918819, + "grad_norm": 1.2726582104041342, + "learning_rate": 1.215240428771969e-07, + "loss": 0.25937923789024353, + "step": 3858 + }, + { + "epoch": 1.906956629185716, + "grad_norm": 1.6959402751075685, + "learning_rate": 1.2025742093102477e-07, + "loss": 0.2648822069168091, + "step": 3859 + }, + { + "epoch": 1.9074508834795503, + "grad_norm": 1.4639245582336404, + "learning_rate": 1.1899739455983327e-07, + "loss": 0.27612054347991943, + "step": 3860 + }, + { + "epoch": 1.9079451377733845, + "grad_norm": 1.32342317481008, + "learning_rate": 1.1774396460480064e-07, + "loss": 0.2204264998435974, + "step": 3861 + }, + { + "epoch": 1.9084393920672187, + "grad_norm": 1.4448526349141402, + "learning_rate": 1.164971319026964e-07, + "loss": 0.2719968557357788, + "step": 3862 + }, + { + "epoch": 1.908933646361053, + "grad_norm": 1.3288093626980793, + "learning_rate": 1.1525689728588807e-07, + "loss": 0.2308243364095688, + "step": 3863 + }, + { + "epoch": 1.909427900654887, + "grad_norm": 1.405242953564276, + "learning_rate": 1.1402326158234e-07, + "loss": 0.23281638324260712, + "step": 3864 + }, + { + "epoch": 1.909922154948721, + "grad_norm": 1.553800687505842, + "learning_rate": 1.127962256156101e-07, + "loss": 0.26273444294929504, + "step": 3865 + }, + { + "epoch": 1.9104164092425553, + "grad_norm": 1.3311046226223713, + "learning_rate": 1.1157579020484755e-07, + "loss": 0.26783496141433716, + "step": 3866 + }, + { + "epoch": 1.9109106635363895, + "grad_norm": 1.4482920311066827, + "learning_rate": 1.1036195616480061e-07, + "loss": 0.2575075626373291, + "step": 3867 + }, + { + "epoch": 1.9114049178302237, + "grad_norm": 1.3313207733281058, + "learning_rate": 1.0915472430580443e-07, + "loss": 0.24802085757255554, + "step": 3868 + }, + { + "epoch": 1.9118991721240577, + "grad_norm": 1.230518560175702, + "learning_rate": 1.0795409543379099e-07, + "loss": 0.22017821669578552, + "step": 3869 + }, + { + "epoch": 1.912393426417892, + "grad_norm": 1.3804831257002024, + "learning_rate": 1.0676007035028579e-07, + "loss": 0.2525743246078491, + "step": 3870 + }, + { + "epoch": 1.912887680711726, + "grad_norm": 1.5674388988470875, + "learning_rate": 1.05572649852399e-07, + "loss": 0.26704782247543335, + "step": 3871 + }, + { + "epoch": 1.9133819350055603, + "grad_norm": 3.430480948746706, + "learning_rate": 1.0439183473283654e-07, + "loss": 0.25393134355545044, + "step": 3872 + }, + { + "epoch": 1.9138761892993945, + "grad_norm": 1.4465108879454651, + "learning_rate": 1.0321762577989448e-07, + "loss": 0.27266988158226013, + "step": 3873 + }, + { + "epoch": 1.9143704435932287, + "grad_norm": 1.366912603525092, + "learning_rate": 1.0205002377745799e-07, + "loss": 0.2694425582885742, + "step": 3874 + }, + { + "epoch": 1.914864697887063, + "grad_norm": 1.394500016346508, + "learning_rate": 1.0088902950500023e-07, + "loss": 0.28820598125457764, + "step": 3875 + }, + { + "epoch": 1.9153589521808971, + "grad_norm": 1.3050023577266547, + "learning_rate": 9.973464373758679e-08, + "loss": 0.2194051444530487, + "step": 3876 + }, + { + "epoch": 1.9158532064747313, + "grad_norm": 1.3831603392475145, + "learning_rate": 9.858686724586675e-08, + "loss": 0.25639402866363525, + "step": 3877 + }, + { + "epoch": 1.9163474607685655, + "grad_norm": 1.2744346736321277, + "learning_rate": 9.744570079608051e-08, + "loss": 0.23420584201812744, + "step": 3878 + }, + { + "epoch": 1.9168417150623998, + "grad_norm": 1.38639151316596, + "learning_rate": 9.631114515005425e-08, + "loss": 0.2514578700065613, + "step": 3879 + }, + { + "epoch": 1.9173359693562337, + "grad_norm": 1.296540814966686, + "learning_rate": 9.518320106520096e-08, + "loss": 0.2223532646894455, + "step": 3880 + }, + { + "epoch": 1.917830223650068, + "grad_norm": 1.367450022954602, + "learning_rate": 9.406186929451943e-08, + "loss": 0.21725934743881226, + "step": 3881 + }, + { + "epoch": 1.9183244779439022, + "grad_norm": 1.2939049219304557, + "learning_rate": 9.294715058659531e-08, + "loss": 0.2081519365310669, + "step": 3882 + }, + { + "epoch": 1.9188187322377364, + "grad_norm": 1.4148048553245687, + "learning_rate": 9.183904568559998e-08, + "loss": 0.23683780431747437, + "step": 3883 + }, + { + "epoch": 1.9193129865315703, + "grad_norm": 1.3217345576155297, + "learning_rate": 9.073755533128725e-08, + "loss": 0.26095467805862427, + "step": 3884 + }, + { + "epoch": 1.9198072408254045, + "grad_norm": 1.253461281568054, + "learning_rate": 8.964268025899558e-08, + "loss": 0.24427568912506104, + "step": 3885 + }, + { + "epoch": 1.9203014951192388, + "grad_norm": 1.3603609343742546, + "learning_rate": 8.855442119964919e-08, + "loss": 0.23549365997314453, + "step": 3886 + }, + { + "epoch": 1.920795749413073, + "grad_norm": 1.4769071310965274, + "learning_rate": 8.74727788797547e-08, + "loss": 0.2645740807056427, + "step": 3887 + }, + { + "epoch": 1.9212900037069072, + "grad_norm": 1.3315198325383535, + "learning_rate": 8.639775402139894e-08, + "loss": 0.22890612483024597, + "step": 3888 + }, + { + "epoch": 1.9217842580007414, + "grad_norm": 1.4439303401955232, + "learning_rate": 8.532934734225451e-08, + "loss": 0.23417067527770996, + "step": 3889 + }, + { + "epoch": 1.9222785122945756, + "grad_norm": 1.3482339584478593, + "learning_rate": 8.42675595555753e-08, + "loss": 0.26125872135162354, + "step": 3890 + }, + { + "epoch": 1.9227727665884098, + "grad_norm": 1.4420298418522868, + "learning_rate": 8.321239137019433e-08, + "loss": 0.26559343934059143, + "step": 3891 + }, + { + "epoch": 1.923267020882244, + "grad_norm": 1.188066329993037, + "learning_rate": 8.216384349052809e-08, + "loss": 0.2033136785030365, + "step": 3892 + }, + { + "epoch": 1.9237612751760782, + "grad_norm": 1.975689815636208, + "learning_rate": 8.112191661656999e-08, + "loss": 0.2750868797302246, + "step": 3893 + }, + { + "epoch": 1.9242555294699124, + "grad_norm": 1.366292176712638, + "learning_rate": 8.008661144389807e-08, + "loss": 0.2082993984222412, + "step": 3894 + }, + { + "epoch": 1.9247497837637466, + "grad_norm": 1.4608755297303442, + "learning_rate": 7.905792866366501e-08, + "loss": 0.2495439350605011, + "step": 3895 + }, + { + "epoch": 1.9252440380575806, + "grad_norm": 1.4141233844295813, + "learning_rate": 7.803586896260707e-08, + "loss": 0.25609591603279114, + "step": 3896 + }, + { + "epoch": 1.9257382923514148, + "grad_norm": 1.5334004898395663, + "learning_rate": 7.702043302303397e-08, + "loss": 0.25372135639190674, + "step": 3897 + }, + { + "epoch": 1.926232546645249, + "grad_norm": 1.3368221554281705, + "learning_rate": 7.601162152283904e-08, + "loss": 0.21882784366607666, + "step": 3898 + }, + { + "epoch": 1.926726800939083, + "grad_norm": 1.5284992426615736, + "learning_rate": 7.500943513548797e-08, + "loss": 0.24513296782970428, + "step": 3899 + }, + { + "epoch": 1.9272210552329172, + "grad_norm": 1.3036631509681367, + "learning_rate": 7.401387453002673e-08, + "loss": 0.23508042097091675, + "step": 3900 + }, + { + "epoch": 1.9277153095267514, + "grad_norm": 1.2751462486235168, + "learning_rate": 7.30249403710792e-08, + "loss": 0.2288282811641693, + "step": 3901 + }, + { + "epoch": 1.9282095638205856, + "grad_norm": 1.4342484579443016, + "learning_rate": 7.204263331884175e-08, + "loss": 0.24606133997440338, + "step": 3902 + }, + { + "epoch": 1.9287038181144198, + "grad_norm": 1.3623815600739415, + "learning_rate": 7.10669540290887e-08, + "loss": 0.2710507810115814, + "step": 3903 + }, + { + "epoch": 1.929198072408254, + "grad_norm": 1.3748292603956795, + "learning_rate": 7.009790315317122e-08, + "loss": 0.27333927154541016, + "step": 3904 + }, + { + "epoch": 1.9296923267020882, + "grad_norm": 1.3028025790213729, + "learning_rate": 6.913548133801074e-08, + "loss": 0.27518531680107117, + "step": 3905 + }, + { + "epoch": 1.9301865809959224, + "grad_norm": 1.3661226136758882, + "learning_rate": 6.817968922610884e-08, + "loss": 0.24289458990097046, + "step": 3906 + }, + { + "epoch": 1.9306808352897566, + "grad_norm": 1.3726485965253954, + "learning_rate": 6.723052745553848e-08, + "loss": 0.225175678730011, + "step": 3907 + }, + { + "epoch": 1.9311750895835909, + "grad_norm": 1.4678815751521954, + "learning_rate": 6.628799665994612e-08, + "loss": 0.2592085599899292, + "step": 3908 + }, + { + "epoch": 1.931669343877425, + "grad_norm": 1.5719300045981148, + "learning_rate": 6.535209746855064e-08, + "loss": 0.2649756968021393, + "step": 3909 + }, + { + "epoch": 1.9321635981712593, + "grad_norm": 1.3380899824561678, + "learning_rate": 6.442283050614673e-08, + "loss": 0.2318311631679535, + "step": 3910 + }, + { + "epoch": 1.9326578524650933, + "grad_norm": 1.6231265342953554, + "learning_rate": 6.350019639309923e-08, + "loss": 0.252924382686615, + "step": 3911 + }, + { + "epoch": 1.9331521067589275, + "grad_norm": 1.1670510769577984, + "learning_rate": 6.258419574534547e-08, + "loss": 0.1903652548789978, + "step": 3912 + }, + { + "epoch": 1.9336463610527617, + "grad_norm": 1.270678601269557, + "learning_rate": 6.167482917439404e-08, + "loss": 0.22795221209526062, + "step": 3913 + }, + { + "epoch": 1.9341406153465959, + "grad_norm": 1.5197318429157889, + "learning_rate": 6.077209728732492e-08, + "loss": 0.26521584391593933, + "step": 3914 + }, + { + "epoch": 1.9346348696404299, + "grad_norm": 1.272486350308544, + "learning_rate": 5.987600068679045e-08, + "loss": 0.22152049839496613, + "step": 3915 + }, + { + "epoch": 1.935129123934264, + "grad_norm": 1.2727416096160045, + "learning_rate": 5.898653997100989e-08, + "loss": 0.22663083672523499, + "step": 3916 + }, + { + "epoch": 1.9356233782280983, + "grad_norm": 1.3553153320714941, + "learning_rate": 5.8103715733776047e-08, + "loss": 0.23720389604568481, + "step": 3917 + }, + { + "epoch": 1.9361176325219325, + "grad_norm": 1.4063431471110097, + "learning_rate": 5.722752856444858e-08, + "loss": 0.24053935706615448, + "step": 3918 + }, + { + "epoch": 1.9366118868157667, + "grad_norm": 1.3614412415474415, + "learning_rate": 5.635797904795848e-08, + "loss": 0.26565641164779663, + "step": 3919 + }, + { + "epoch": 1.9371061411096009, + "grad_norm": 1.4288462330405298, + "learning_rate": 5.5495067764804736e-08, + "loss": 0.27181264758110046, + "step": 3920 + }, + { + "epoch": 1.937600395403435, + "grad_norm": 1.3077579832623365, + "learning_rate": 5.46387952910532e-08, + "loss": 0.23340710997581482, + "step": 3921 + }, + { + "epoch": 1.9380946496972693, + "grad_norm": 1.2207580518535108, + "learning_rate": 5.378916219833996e-08, + "loss": 0.19458985328674316, + "step": 3922 + }, + { + "epoch": 1.9385889039911035, + "grad_norm": 1.4167428327318625, + "learning_rate": 5.2946169053869066e-08, + "loss": 0.22900202870368958, + "step": 3923 + }, + { + "epoch": 1.9390831582849377, + "grad_norm": 1.528940034628332, + "learning_rate": 5.210981642040924e-08, + "loss": 0.30710160732269287, + "step": 3924 + }, + { + "epoch": 1.939577412578772, + "grad_norm": 1.1725653667546314, + "learning_rate": 5.12801048562972e-08, + "loss": 0.1754809319972992, + "step": 3925 + }, + { + "epoch": 1.940071666872606, + "grad_norm": 1.4924045607844934, + "learning_rate": 5.045703491543763e-08, + "loss": 0.28787121176719666, + "step": 3926 + }, + { + "epoch": 1.94056592116644, + "grad_norm": 1.2741088738360473, + "learning_rate": 4.96406071472999e-08, + "loss": 0.2239963263273239, + "step": 3927 + }, + { + "epoch": 1.9410601754602743, + "grad_norm": 1.3932338575101701, + "learning_rate": 4.883082209692025e-08, + "loss": 0.2121300995349884, + "step": 3928 + }, + { + "epoch": 1.9415544297541085, + "grad_norm": 1.4253326667240858, + "learning_rate": 4.802768030489735e-08, + "loss": 0.23445773124694824, + "step": 3929 + }, + { + "epoch": 1.9420486840479425, + "grad_norm": 1.3518230097115338, + "learning_rate": 4.7231182307400095e-08, + "loss": 0.2369021326303482, + "step": 3930 + }, + { + "epoch": 1.9425429383417767, + "grad_norm": 1.4790931523959723, + "learning_rate": 4.644132863615758e-08, + "loss": 0.2764047086238861, + "step": 3931 + }, + { + "epoch": 1.943037192635611, + "grad_norm": 1.3688921537083945, + "learning_rate": 4.565811981846468e-08, + "loss": 0.26021280884742737, + "step": 3932 + }, + { + "epoch": 1.9435314469294451, + "grad_norm": 1.4010339478163996, + "learning_rate": 4.488155637718095e-08, + "loss": 0.26012706756591797, + "step": 3933 + }, + { + "epoch": 1.9440257012232793, + "grad_norm": 1.4174814798438116, + "learning_rate": 4.4111638830729444e-08, + "loss": 0.22092604637145996, + "step": 3934 + }, + { + "epoch": 1.9445199555171135, + "grad_norm": 1.3803129729570953, + "learning_rate": 4.334836769309347e-08, + "loss": 0.24200648069381714, + "step": 3935 + }, + { + "epoch": 1.9450142098109477, + "grad_norm": 1.4185191605274636, + "learning_rate": 4.2591743473826554e-08, + "loss": 0.2545608580112457, + "step": 3936 + }, + { + "epoch": 1.945508464104782, + "grad_norm": 1.4140513212071641, + "learning_rate": 4.1841766678036854e-08, + "loss": 0.24908477067947388, + "step": 3937 + }, + { + "epoch": 1.9460027183986162, + "grad_norm": 1.3236421908105307, + "learning_rate": 4.109843780639833e-08, + "loss": 0.23568233847618103, + "step": 3938 + }, + { + "epoch": 1.9464969726924504, + "grad_norm": 1.4613354363975228, + "learning_rate": 4.0361757355147355e-08, + "loss": 0.2230791449546814, + "step": 3939 + }, + { + "epoch": 1.9469912269862846, + "grad_norm": 1.4124924138900457, + "learning_rate": 3.963172581608166e-08, + "loss": 0.2541523277759552, + "step": 3940 + }, + { + "epoch": 1.9474854812801188, + "grad_norm": 1.320376312149322, + "learning_rate": 3.8908343676559156e-08, + "loss": 0.2466837763786316, + "step": 3941 + }, + { + "epoch": 1.9479797355739528, + "grad_norm": 1.434845915637092, + "learning_rate": 3.819161141950134e-08, + "loss": 0.2700938880443573, + "step": 3942 + }, + { + "epoch": 1.948473989867787, + "grad_norm": 1.2847188951445323, + "learning_rate": 3.7481529523384355e-08, + "loss": 0.2353779673576355, + "step": 3943 + }, + { + "epoch": 1.9489682441616212, + "grad_norm": 1.4586624394757335, + "learning_rate": 3.677809846225344e-08, + "loss": 0.25708913803100586, + "step": 3944 + }, + { + "epoch": 1.9494624984554554, + "grad_norm": 1.3816118361393621, + "learning_rate": 3.6081318705705195e-08, + "loss": 0.26113903522491455, + "step": 3945 + }, + { + "epoch": 1.9499567527492894, + "grad_norm": 1.353452761923649, + "learning_rate": 3.539119071890307e-08, + "loss": 0.2561355531215668, + "step": 3946 + }, + { + "epoch": 1.9504510070431236, + "grad_norm": 1.3679281740557483, + "learning_rate": 3.470771496256409e-08, + "loss": 0.24893885850906372, + "step": 3947 + }, + { + "epoch": 1.9509452613369578, + "grad_norm": 1.3725754281016815, + "learning_rate": 3.403089189296771e-08, + "loss": 0.25399699807167053, + "step": 3948 + }, + { + "epoch": 1.951439515630792, + "grad_norm": 1.4355008801200986, + "learning_rate": 3.3360721961952505e-08, + "loss": 0.2820609509944916, + "step": 3949 + }, + { + "epoch": 1.9519337699246262, + "grad_norm": 1.222474026880474, + "learning_rate": 3.269720561691281e-08, + "loss": 0.22128066420555115, + "step": 3950 + }, + { + "epoch": 1.9524280242184604, + "grad_norm": 1.2322544942302993, + "learning_rate": 3.204034330080319e-08, + "loss": 0.2132534235715866, + "step": 3951 + }, + { + "epoch": 1.9529222785122946, + "grad_norm": 1.3290638165306805, + "learning_rate": 3.1390135452135095e-08, + "loss": 0.2308463454246521, + "step": 3952 + }, + { + "epoch": 1.9534165328061288, + "grad_norm": 1.443895071093895, + "learning_rate": 3.074658250497908e-08, + "loss": 0.2756718397140503, + "step": 3953 + }, + { + "epoch": 1.953910787099963, + "grad_norm": 1.404895730578394, + "learning_rate": 3.010968488896149e-08, + "loss": 0.24619412422180176, + "step": 3954 + }, + { + "epoch": 1.9544050413937972, + "grad_norm": 1.3236097287021305, + "learning_rate": 2.9479443029265532e-08, + "loss": 0.2164454162120819, + "step": 3955 + }, + { + "epoch": 1.9548992956876314, + "grad_norm": 1.4498814795200483, + "learning_rate": 2.8855857346632432e-08, + "loss": 0.2778991460800171, + "step": 3956 + }, + { + "epoch": 1.9553935499814654, + "grad_norm": 1.3781229461817452, + "learning_rate": 2.8238928257359188e-08, + "loss": 0.22639301419258118, + "step": 3957 + }, + { + "epoch": 1.9558878042752996, + "grad_norm": 1.3946690868287814, + "learning_rate": 2.7628656173297463e-08, + "loss": 0.2367630898952484, + "step": 3958 + }, + { + "epoch": 1.9563820585691338, + "grad_norm": 1.233715623675162, + "learning_rate": 2.702504150185692e-08, + "loss": 0.2400333285331726, + "step": 3959 + }, + { + "epoch": 1.956876312862968, + "grad_norm": 1.5197718598123784, + "learning_rate": 2.6428084646001884e-08, + "loss": 0.2384340763092041, + "step": 3960 + }, + { + "epoch": 1.957370567156802, + "grad_norm": 1.2187445449938668, + "learning_rate": 2.5837786004253572e-08, + "loss": 0.20191673934459686, + "step": 3961 + }, + { + "epoch": 1.9578648214506362, + "grad_norm": 1.3530782559852856, + "learning_rate": 2.525414597068565e-08, + "loss": 0.24700434505939484, + "step": 3962 + }, + { + "epoch": 1.9583590757444704, + "grad_norm": 1.2586448244620927, + "learning_rate": 2.4677164934928665e-08, + "loss": 0.20032359659671783, + "step": 3963 + }, + { + "epoch": 1.9588533300383046, + "grad_norm": 1.5212375132060378, + "learning_rate": 2.4106843282165615e-08, + "loss": 0.280154287815094, + "step": 3964 + }, + { + "epoch": 1.9593475843321388, + "grad_norm": 1.46590896106962, + "learning_rate": 2.3543181393135274e-08, + "loss": 0.25518566370010376, + "step": 3965 + }, + { + "epoch": 1.959841838625973, + "grad_norm": 1.2792917112791735, + "learning_rate": 2.298617964413108e-08, + "loss": 0.2246837019920349, + "step": 3966 + }, + { + "epoch": 1.9603360929198073, + "grad_norm": 1.3954375167289552, + "learning_rate": 2.2435838407000034e-08, + "loss": 0.23355916142463684, + "step": 3967 + }, + { + "epoch": 1.9608303472136415, + "grad_norm": 1.3615561015896285, + "learning_rate": 2.1892158049140467e-08, + "loss": 0.2449415922164917, + "step": 3968 + }, + { + "epoch": 1.9613246015074757, + "grad_norm": 1.279518283780108, + "learning_rate": 2.1355138933507602e-08, + "loss": 0.2269652783870697, + "step": 3969 + }, + { + "epoch": 1.9618188558013099, + "grad_norm": 1.4090731883758925, + "learning_rate": 2.0824781418605776e-08, + "loss": 0.26923638582229614, + "step": 3970 + }, + { + "epoch": 1.962313110095144, + "grad_norm": 1.3838329777907195, + "learning_rate": 2.0301085858493996e-08, + "loss": 0.2631189823150635, + "step": 3971 + }, + { + "epoch": 1.9628073643889783, + "grad_norm": 1.272147209216066, + "learning_rate": 1.978405260278593e-08, + "loss": 0.23281526565551758, + "step": 3972 + }, + { + "epoch": 1.9633016186828123, + "grad_norm": 1.4504818525258278, + "learning_rate": 1.9273681996644365e-08, + "loss": 0.26399385929107666, + "step": 3973 + }, + { + "epoch": 1.9637958729766465, + "grad_norm": 1.42867652212037, + "learning_rate": 1.876997438078454e-08, + "loss": 0.2641673684120178, + "step": 3974 + }, + { + "epoch": 1.9642901272704807, + "grad_norm": 1.2944638856965318, + "learning_rate": 1.8272930091476347e-08, + "loss": 0.22440402209758759, + "step": 3975 + }, + { + "epoch": 1.9647843815643147, + "grad_norm": 1.3585170311291963, + "learning_rate": 1.778254946053881e-08, + "loss": 0.2552195191383362, + "step": 3976 + }, + { + "epoch": 1.9652786358581489, + "grad_norm": 1.3475063805104281, + "learning_rate": 1.729883281534117e-08, + "loss": 0.24455100297927856, + "step": 3977 + }, + { + "epoch": 1.965772890151983, + "grad_norm": 1.4925946223112605, + "learning_rate": 1.6821780478808448e-08, + "loss": 0.2324603945016861, + "step": 3978 + }, + { + "epoch": 1.9662671444458173, + "grad_norm": 1.295320797137711, + "learning_rate": 1.6351392769412556e-08, + "loss": 0.25488242506980896, + "step": 3979 + }, + { + "epoch": 1.9667613987396515, + "grad_norm": 1.2382372998222446, + "learning_rate": 1.5887670001177856e-08, + "loss": 0.23511120676994324, + "step": 3980 + }, + { + "epoch": 1.9672556530334857, + "grad_norm": 1.474081111410746, + "learning_rate": 1.5430612483680052e-08, + "loss": 0.2683457136154175, + "step": 3981 + }, + { + "epoch": 1.96774990732732, + "grad_norm": 1.3558148882952648, + "learning_rate": 1.4980220522041734e-08, + "loss": 0.26627787947654724, + "step": 3982 + }, + { + "epoch": 1.9682441616211541, + "grad_norm": 1.3779286197554192, + "learning_rate": 1.4536494416940162e-08, + "loss": 0.22931841015815735, + "step": 3983 + }, + { + "epoch": 1.9687384159149883, + "grad_norm": 1.3860885624616435, + "learning_rate": 1.4099434464600603e-08, + "loss": 0.22918352484703064, + "step": 3984 + }, + { + "epoch": 1.9692326702088225, + "grad_norm": 1.498852903518302, + "learning_rate": 1.3669040956797442e-08, + "loss": 0.2542854845523834, + "step": 3985 + }, + { + "epoch": 1.9697269245026567, + "grad_norm": 1.278204707841908, + "learning_rate": 1.3245314180854175e-08, + "loss": 0.21581681072711945, + "step": 3986 + }, + { + "epoch": 1.970221178796491, + "grad_norm": 1.4943144749429917, + "learning_rate": 1.2828254419646746e-08, + "loss": 0.2708613872528076, + "step": 3987 + }, + { + "epoch": 1.970715433090325, + "grad_norm": 1.3377756042264306, + "learning_rate": 1.2417861951597998e-08, + "loss": 0.25348716974258423, + "step": 3988 + }, + { + "epoch": 1.9712096873841591, + "grad_norm": 1.396109244896111, + "learning_rate": 1.2014137050677665e-08, + "loss": 0.24585089087486267, + "step": 3989 + }, + { + "epoch": 1.9717039416779933, + "grad_norm": 1.3367696007925745, + "learning_rate": 1.1617079986410152e-08, + "loss": 0.26362112164497375, + "step": 3990 + }, + { + "epoch": 1.9721981959718276, + "grad_norm": 1.815729582105598, + "learning_rate": 1.1226691023862312e-08, + "loss": 0.23288659751415253, + "step": 3991 + }, + { + "epoch": 1.9726924502656615, + "grad_norm": 1.2305688182670602, + "learning_rate": 1.0842970423654563e-08, + "loss": 0.21604478359222412, + "step": 3992 + }, + { + "epoch": 1.9731867045594957, + "grad_norm": 1.353706093653017, + "learning_rate": 1.0465918441950885e-08, + "loss": 0.21149985492229462, + "step": 3993 + }, + { + "epoch": 1.97368095885333, + "grad_norm": 1.40480632228099, + "learning_rate": 1.0095535330467698e-08, + "loss": 0.26392504572868347, + "step": 3994 + }, + { + "epoch": 1.9741752131471642, + "grad_norm": 1.246030064073758, + "learning_rate": 9.731821336466107e-09, + "loss": 0.22993823885917664, + "step": 3995 + }, + { + "epoch": 1.9746694674409984, + "grad_norm": 1.247780565740116, + "learning_rate": 9.374776702757438e-09, + "loss": 0.2207789570093155, + "step": 3996 + }, + { + "epoch": 1.9751637217348326, + "grad_norm": 1.3761642168404886, + "learning_rate": 9.024401667698802e-09, + "loss": 0.27149268984794617, + "step": 3997 + }, + { + "epoch": 1.9756579760286668, + "grad_norm": 1.4326020240148696, + "learning_rate": 8.680696465196425e-09, + "loss": 0.269406795501709, + "step": 3998 + }, + { + "epoch": 1.976152230322501, + "grad_norm": 1.4639077922370294, + "learning_rate": 8.343661324703434e-09, + "loss": 0.25354713201522827, + "step": 3999 + }, + { + "epoch": 1.9766464846163352, + "grad_norm": 1.3417205540337154, + "learning_rate": 8.013296471217624e-09, + "loss": 0.22957751154899597, + "step": 4000 + } + ], + "logging_steps": 1, + "max_steps": 4048, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3030468003299328.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4000/training_args.bin b/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dfe3e09693106b888d9a74120f900fc466890d4c --- /dev/null +++ b/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0dfb10ba35de856be3ab9b2b044348b5752efc43fb83f0d6e71a782894a3001 +size 6968 diff --git a/checkpoint-4000/zero_to_fp32.py b/checkpoint-4000/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-4000/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-4048/README.md b/checkpoint-4048/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-4048/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-4048/adapter_config.json b/checkpoint-4048/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6cf16f3266f5592ee03447db73cafc0bd600786e --- /dev/null +++ b/checkpoint-4048/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.25.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.13.mlp.down_proj", + "layers.20.mlp.gate_proj", + "layers.10.mlp.down_proj", + "layers.27.mlp.down_proj", + "layers.10.mlp.up_proj", + "layers.7.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.15.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.3.mlp.gate_proj", + "layers.14.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.22.mlp.up_proj", + "layers.27.mlp.up_proj", + "layers.12.mlp.up_proj", + "layers.4.mlp.up_proj", + "layers.19.mlp.up_proj", + "layers.11.mlp.up_proj", + "layers.10.mlp.gate_proj", + "layers.15.mlp.up_proj", + "layers.20.mlp.down_proj", + "layers.4.mlp.down_proj", + "layers.3.mlp.up_proj", + "layers.5.mlp.gate_proj", + "layers.4.mlp.gate_proj", + "layers.5.mlp.down_proj", + "layers.6.mlp.up_proj", + "o_proj", + "layers.24.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.25.mlp.down_proj", + "layers.16.mlp.down_proj", + "layers.12.mlp.gate_proj", + "layers.19.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.21.mlp.gate_proj", + "layers.0.mlp.down_proj", + "layers.18.mlp.up_proj", + "layers.15.mlp.gate_proj", + "layers.8.mlp.gate_proj", + "layers.13.mlp.gate_proj", + "layers.11.mlp.down_proj", + "layers.11.mlp.gate_proj", + "layers.7.mlp.gate_proj", + "layers.17.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.24.mlp.gate_proj", + "layers.16.mlp.gate_proj", + "layers.20.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.22.mlp.down_proj", + "layers.6.mlp.gate_proj", + "layers.0.mlp.gate_proj", + "layers.5.mlp.up_proj", + "layers.18.mlp.gate_proj", + "k_proj", + "layers.9.mlp.gate_proj", + "layers.23.mlp.down_proj", + "layers.26.mlp.down_proj", + "layers.19.mlp.down_proj", + "layers.22.mlp.gate_proj", + "layers.13.mlp.up_proj", + "v_proj", + "layers.6.mlp.down_proj", + "q_proj", + "layers.27.mlp.gate_proj", + "layers.9.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.14.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.17.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.0.mlp.up_proj", + "layers.26.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.2.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.16.mlp.up_proj", + "layers.12.mlp.down_proj", + "layers.2.mlp.up_proj", + "layers.1.mlp.up_proj", + "layers.8.mlp.down_proj", + "layers.3.mlp.down_proj", + "layers.24.mlp.up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-4048/adapter_model.safetensors b/checkpoint-4048/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90b38b84542e94f6452ac08ffb8255ea56ac05c6 --- /dev/null +++ b/checkpoint-4048/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6852ab96cd7ce0b54e64dd7759da8510a4a13074411e19fe8276f4d698378316 +size 323020440 diff --git a/checkpoint-4048/chat_template.jinja b/checkpoint-4048/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-4048/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-4048/global_step4048/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-4048/global_step4048/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..852e3005fb2276c459b28fe4c62a72073f265762 --- /dev/null +++ b/checkpoint-4048/global_step4048/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f3f508e887e0f6752756b614a2d4ee9f6bbfe834a5d413718dd8d61a6439cbb +size 1937772272 diff --git a/checkpoint-4048/global_step4048/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-4048/global_step4048/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9093281760ec6068a1fdd2bbbe9ddcacb25a3468 --- /dev/null +++ b/checkpoint-4048/global_step4048/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b4f698a8ffc81cf199273d95ff195b9cb8c457e68ef7a445e70540904371799 +size 460630 diff --git a/checkpoint-4048/latest b/checkpoint-4048/latest new file mode 100644 index 0000000000000000000000000000000000000000..2381ea6b25985bbc7c8478c7de9a8bf68e00ad4e --- /dev/null +++ b/checkpoint-4048/latest @@ -0,0 +1 @@ +global_step4048 \ No newline at end of file diff --git a/checkpoint-4048/processor_config.json b/checkpoint-4048/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-4048/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-4048/rng_state.pth b/checkpoint-4048/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..177c8424d12a36669452128bcb13c83fd58374f1 --- /dev/null +++ b/checkpoint-4048/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c93de4fae510d688cd693bf4dba7085d32c468ba17a2eefa8f89ec70a5dcc620 +size 14244 diff --git a/checkpoint-4048/scheduler.pt b/checkpoint-4048/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f8fdb95d849b386c8832415c6cad9ff115d71d6 --- /dev/null +++ b/checkpoint-4048/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8de4450b489ae8229a5dee698e7caf71d4f83704579607b6b853d76f8d879cba +size 1000 diff --git a/checkpoint-4048/tokenizer.json b/checkpoint-4048/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-4048/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-4048/tokenizer_config.json b/checkpoint-4048/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-4048/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-4048/trainer_state.json b/checkpoint-4048/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b2047d78c7f8d348434415ce8e066726359d7caa --- /dev/null +++ b/checkpoint-4048/trainer_state.json @@ -0,0 +1,28370 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 4048, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0004942542938341777, + "grad_norm": 1.7827389996067007, + "learning_rate": 0.0, + "loss": 1.1816105842590332, + "step": 1 + }, + { + "epoch": 0.0009885085876683553, + "grad_norm": 1.891128580111598, + "learning_rate": 9.852216748768474e-08, + "loss": 1.1496102809906006, + "step": 2 + }, + { + "epoch": 0.001482762881502533, + "grad_norm": 1.8581340535316004, + "learning_rate": 1.9704433497536947e-07, + "loss": 1.1515967845916748, + "step": 3 + }, + { + "epoch": 0.0019770171753367106, + "grad_norm": 1.708604556953044, + "learning_rate": 2.955665024630542e-07, + "loss": 1.1795943975448608, + "step": 4 + }, + { + "epoch": 0.0024712714691708885, + "grad_norm": 1.8513528590958555, + "learning_rate": 3.9408866995073894e-07, + "loss": 1.2289564609527588, + "step": 5 + }, + { + "epoch": 0.002965525763005066, + "grad_norm": 1.972324289049384, + "learning_rate": 4.926108374384237e-07, + "loss": 1.179269790649414, + "step": 6 + }, + { + "epoch": 0.003459780056839244, + "grad_norm": 1.8334156798400192, + "learning_rate": 5.911330049261084e-07, + "loss": 1.199608564376831, + "step": 7 + }, + { + "epoch": 0.003954034350673421, + "grad_norm": 1.6669436389627912, + "learning_rate": 6.896551724137931e-07, + "loss": 1.1643707752227783, + "step": 8 + }, + { + "epoch": 0.004448288644507599, + "grad_norm": 1.8750060934609654, + "learning_rate": 7.881773399014779e-07, + "loss": 1.1264240741729736, + "step": 9 + }, + { + "epoch": 0.004942542938341777, + "grad_norm": 1.9962482953672744, + "learning_rate": 8.866995073891626e-07, + "loss": 1.1717555522918701, + "step": 10 + }, + { + "epoch": 0.005436797232175955, + "grad_norm": 1.895693583554434, + "learning_rate": 9.852216748768474e-07, + "loss": 1.1856712102890015, + "step": 11 + }, + { + "epoch": 0.005931051526010132, + "grad_norm": 1.7765248738469863, + "learning_rate": 1.0837438423645322e-06, + "loss": 1.1258785724639893, + "step": 12 + }, + { + "epoch": 0.00642530581984431, + "grad_norm": 1.8326605479421993, + "learning_rate": 1.1822660098522167e-06, + "loss": 1.1333656311035156, + "step": 13 + }, + { + "epoch": 0.006919560113678488, + "grad_norm": 1.9142537067819894, + "learning_rate": 1.2807881773399017e-06, + "loss": 1.2281363010406494, + "step": 14 + }, + { + "epoch": 0.0074138144075126654, + "grad_norm": 1.9232318367357442, + "learning_rate": 1.3793103448275862e-06, + "loss": 1.1910676956176758, + "step": 15 + }, + { + "epoch": 0.007908068701346842, + "grad_norm": 2.5599273269087885, + "learning_rate": 1.4778325123152712e-06, + "loss": 1.2124552726745605, + "step": 16 + }, + { + "epoch": 0.008402322995181021, + "grad_norm": 2.2109761155287133, + "learning_rate": 1.5763546798029558e-06, + "loss": 1.1993463039398193, + "step": 17 + }, + { + "epoch": 0.008896577289015198, + "grad_norm": 2.1999117305307077, + "learning_rate": 1.6748768472906405e-06, + "loss": 1.1245683431625366, + "step": 18 + }, + { + "epoch": 0.009390831582849375, + "grad_norm": 2.203478389299074, + "learning_rate": 1.7733990147783253e-06, + "loss": 1.1838568449020386, + "step": 19 + }, + { + "epoch": 0.009885085876683554, + "grad_norm": 2.419107047950166, + "learning_rate": 1.8719211822660098e-06, + "loss": 1.081169843673706, + "step": 20 + }, + { + "epoch": 0.010379340170517731, + "grad_norm": 2.559921706815215, + "learning_rate": 1.970443349753695e-06, + "loss": 1.1506569385528564, + "step": 21 + }, + { + "epoch": 0.01087359446435191, + "grad_norm": 2.8697838151244977, + "learning_rate": 2.0689655172413796e-06, + "loss": 1.0841327905654907, + "step": 22 + }, + { + "epoch": 0.011367848758186087, + "grad_norm": 2.8012936510978905, + "learning_rate": 2.1674876847290643e-06, + "loss": 1.1335525512695312, + "step": 23 + }, + { + "epoch": 0.011862103052020264, + "grad_norm": 2.649521736906966, + "learning_rate": 2.266009852216749e-06, + "loss": 1.035188913345337, + "step": 24 + }, + { + "epoch": 0.012356357345854442, + "grad_norm": 2.7385314170591166, + "learning_rate": 2.3645320197044334e-06, + "loss": 1.0640877485275269, + "step": 25 + }, + { + "epoch": 0.01285061163968862, + "grad_norm": 2.5011806151261755, + "learning_rate": 2.4630541871921186e-06, + "loss": 1.0479273796081543, + "step": 26 + }, + { + "epoch": 0.013344865933522798, + "grad_norm": 2.236670838822209, + "learning_rate": 2.5615763546798034e-06, + "loss": 1.0522505044937134, + "step": 27 + }, + { + "epoch": 0.013839120227356975, + "grad_norm": 2.065544668093392, + "learning_rate": 2.660098522167488e-06, + "loss": 1.080836296081543, + "step": 28 + }, + { + "epoch": 0.014333374521191152, + "grad_norm": 1.7478242928012908, + "learning_rate": 2.7586206896551725e-06, + "loss": 0.9712544679641724, + "step": 29 + }, + { + "epoch": 0.014827628815025331, + "grad_norm": 1.5930614486695707, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.0469061136245728, + "step": 30 + }, + { + "epoch": 0.015321883108859508, + "grad_norm": 1.380137621152324, + "learning_rate": 2.9556650246305424e-06, + "loss": 0.9911116361618042, + "step": 31 + }, + { + "epoch": 0.015816137402693685, + "grad_norm": 1.3167918112915387, + "learning_rate": 3.054187192118227e-06, + "loss": 0.9552959203720093, + "step": 32 + }, + { + "epoch": 0.016310391696527864, + "grad_norm": 1.2266567383194062, + "learning_rate": 3.1527093596059115e-06, + "loss": 0.957429051399231, + "step": 33 + }, + { + "epoch": 0.016804645990362042, + "grad_norm": 1.305011449405004, + "learning_rate": 3.2512315270935963e-06, + "loss": 1.0180628299713135, + "step": 34 + }, + { + "epoch": 0.017298900284196218, + "grad_norm": 1.2347397961596738, + "learning_rate": 3.349753694581281e-06, + "loss": 0.9064415097236633, + "step": 35 + }, + { + "epoch": 0.017793154578030396, + "grad_norm": 1.216758814553776, + "learning_rate": 3.448275862068966e-06, + "loss": 0.9718184471130371, + "step": 36 + }, + { + "epoch": 0.018287408871864575, + "grad_norm": 1.065779121444896, + "learning_rate": 3.5467980295566506e-06, + "loss": 0.8831444978713989, + "step": 37 + }, + { + "epoch": 0.01878166316569875, + "grad_norm": 1.0132491929086573, + "learning_rate": 3.6453201970443354e-06, + "loss": 0.9167139530181885, + "step": 38 + }, + { + "epoch": 0.01927591745953293, + "grad_norm": 1.0431186403983612, + "learning_rate": 3.7438423645320197e-06, + "loss": 0.9322037696838379, + "step": 39 + }, + { + "epoch": 0.019770171753367108, + "grad_norm": 1.0319066435292568, + "learning_rate": 3.842364532019705e-06, + "loss": 0.9189817905426025, + "step": 40 + }, + { + "epoch": 0.020264426047201287, + "grad_norm": 1.1670657884595383, + "learning_rate": 3.94088669950739e-06, + "loss": 0.8480448126792908, + "step": 41 + }, + { + "epoch": 0.020758680341035462, + "grad_norm": 0.9850175889441174, + "learning_rate": 4.039408866995074e-06, + "loss": 0.8907301425933838, + "step": 42 + }, + { + "epoch": 0.02125293463486964, + "grad_norm": 1.0028387912933743, + "learning_rate": 4.137931034482759e-06, + "loss": 0.8674390316009521, + "step": 43 + }, + { + "epoch": 0.02174718892870382, + "grad_norm": 0.9822966394815191, + "learning_rate": 4.236453201970444e-06, + "loss": 0.8674882054328918, + "step": 44 + }, + { + "epoch": 0.022241443222537995, + "grad_norm": 0.9778327665239519, + "learning_rate": 4.334975369458129e-06, + "loss": 0.8542560338973999, + "step": 45 + }, + { + "epoch": 0.022735697516372173, + "grad_norm": 0.8621828386281931, + "learning_rate": 4.4334975369458135e-06, + "loss": 0.772778332233429, + "step": 46 + }, + { + "epoch": 0.023229951810206352, + "grad_norm": 0.8638093364937629, + "learning_rate": 4.532019704433498e-06, + "loss": 0.7481152415275574, + "step": 47 + }, + { + "epoch": 0.023724206104040527, + "grad_norm": 0.8467972866728939, + "learning_rate": 4.630541871921182e-06, + "loss": 0.8373709917068481, + "step": 48 + }, + { + "epoch": 0.024218460397874706, + "grad_norm": 0.8165134857986008, + "learning_rate": 4.729064039408867e-06, + "loss": 0.8163385391235352, + "step": 49 + }, + { + "epoch": 0.024712714691708885, + "grad_norm": 0.833026336683437, + "learning_rate": 4.8275862068965525e-06, + "loss": 0.7444975972175598, + "step": 50 + }, + { + "epoch": 0.025206968985543064, + "grad_norm": 0.858591041664589, + "learning_rate": 4.926108374384237e-06, + "loss": 0.7683243751525879, + "step": 51 + }, + { + "epoch": 0.02570122327937724, + "grad_norm": 1.0127725906591662, + "learning_rate": 5.024630541871922e-06, + "loss": 0.806761622428894, + "step": 52 + }, + { + "epoch": 0.026195477573211418, + "grad_norm": 0.8333649125881921, + "learning_rate": 5.123152709359607e-06, + "loss": 0.7312102913856506, + "step": 53 + }, + { + "epoch": 0.026689731867045596, + "grad_norm": 0.9425883709792775, + "learning_rate": 5.2216748768472915e-06, + "loss": 0.7351999282836914, + "step": 54 + }, + { + "epoch": 0.02718398616087977, + "grad_norm": 0.9039627787948463, + "learning_rate": 5.320197044334976e-06, + "loss": 0.7453763484954834, + "step": 55 + }, + { + "epoch": 0.02767824045471395, + "grad_norm": 0.9324665454088699, + "learning_rate": 5.41871921182266e-06, + "loss": 0.7063292860984802, + "step": 56 + }, + { + "epoch": 0.02817249474854813, + "grad_norm": 0.8343256198457882, + "learning_rate": 5.517241379310345e-06, + "loss": 0.7145994901657104, + "step": 57 + }, + { + "epoch": 0.028666749042382304, + "grad_norm": 0.7157092163314197, + "learning_rate": 5.61576354679803e-06, + "loss": 0.687594473361969, + "step": 58 + }, + { + "epoch": 0.029161003336216483, + "grad_norm": 0.7603582128739335, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.6643895506858826, + "step": 59 + }, + { + "epoch": 0.029655257630050662, + "grad_norm": 0.6925073694472516, + "learning_rate": 5.812807881773399e-06, + "loss": 0.6781614422798157, + "step": 60 + }, + { + "epoch": 0.030149511923884837, + "grad_norm": 0.7169709854131228, + "learning_rate": 5.911330049261085e-06, + "loss": 0.6209158301353455, + "step": 61 + }, + { + "epoch": 0.030643766217719016, + "grad_norm": 0.6749920715098945, + "learning_rate": 6.00985221674877e-06, + "loss": 0.6424679756164551, + "step": 62 + }, + { + "epoch": 0.031138020511553195, + "grad_norm": 0.6435584468821339, + "learning_rate": 6.108374384236454e-06, + "loss": 0.6745971441268921, + "step": 63 + }, + { + "epoch": 0.03163227480538737, + "grad_norm": 0.657544191989632, + "learning_rate": 6.206896551724138e-06, + "loss": 0.6520330905914307, + "step": 64 + }, + { + "epoch": 0.03212652909922155, + "grad_norm": 0.6351335823908374, + "learning_rate": 6.305418719211823e-06, + "loss": 0.6790571212768555, + "step": 65 + }, + { + "epoch": 0.03262078339305573, + "grad_norm": 0.6484215339353426, + "learning_rate": 6.403940886699508e-06, + "loss": 0.6491506099700928, + "step": 66 + }, + { + "epoch": 0.033115037686889906, + "grad_norm": 0.617685895397393, + "learning_rate": 6.502463054187193e-06, + "loss": 0.6347313523292542, + "step": 67 + }, + { + "epoch": 0.033609291980724085, + "grad_norm": 0.6638567270691007, + "learning_rate": 6.600985221674877e-06, + "loss": 0.6785881519317627, + "step": 68 + }, + { + "epoch": 0.034103546274558263, + "grad_norm": 0.6459369268846485, + "learning_rate": 6.699507389162562e-06, + "loss": 0.6470085978507996, + "step": 69 + }, + { + "epoch": 0.034597800568392435, + "grad_norm": 0.6364523697931875, + "learning_rate": 6.798029556650246e-06, + "loss": 0.6205961108207703, + "step": 70 + }, + { + "epoch": 0.035092054862226614, + "grad_norm": 0.6434045969551643, + "learning_rate": 6.896551724137932e-06, + "loss": 0.6621580123901367, + "step": 71 + }, + { + "epoch": 0.03558630915606079, + "grad_norm": 0.6281362500041567, + "learning_rate": 6.995073891625616e-06, + "loss": 0.6363088488578796, + "step": 72 + }, + { + "epoch": 0.03608056344989497, + "grad_norm": 0.6023389614758552, + "learning_rate": 7.093596059113301e-06, + "loss": 0.6073004007339478, + "step": 73 + }, + { + "epoch": 0.03657481774372915, + "grad_norm": 0.5962790573618366, + "learning_rate": 7.192118226600986e-06, + "loss": 0.6490880846977234, + "step": 74 + }, + { + "epoch": 0.03706907203756333, + "grad_norm": 0.6425224117743127, + "learning_rate": 7.290640394088671e-06, + "loss": 0.6540624499320984, + "step": 75 + }, + { + "epoch": 0.0375633263313975, + "grad_norm": 0.6885040620745063, + "learning_rate": 7.3891625615763555e-06, + "loss": 0.6237976551055908, + "step": 76 + }, + { + "epoch": 0.03805758062523168, + "grad_norm": 0.6110947192931153, + "learning_rate": 7.487684729064039e-06, + "loss": 0.6121219992637634, + "step": 77 + }, + { + "epoch": 0.03855183491906586, + "grad_norm": 0.6031847840211293, + "learning_rate": 7.586206896551724e-06, + "loss": 0.5785888433456421, + "step": 78 + }, + { + "epoch": 0.03904608921290004, + "grad_norm": 0.645073431050071, + "learning_rate": 7.68472906403941e-06, + "loss": 0.6144810914993286, + "step": 79 + }, + { + "epoch": 0.039540343506734216, + "grad_norm": 0.709404375816405, + "learning_rate": 7.783251231527095e-06, + "loss": 0.6522500514984131, + "step": 80 + }, + { + "epoch": 0.040034597800568394, + "grad_norm": 0.6784602446095636, + "learning_rate": 7.88177339901478e-06, + "loss": 0.6126501560211182, + "step": 81 + }, + { + "epoch": 0.04052885209440257, + "grad_norm": 0.6834338295248128, + "learning_rate": 7.980295566502464e-06, + "loss": 0.573388934135437, + "step": 82 + }, + { + "epoch": 0.041023106388236745, + "grad_norm": 0.7128627750045655, + "learning_rate": 8.078817733990149e-06, + "loss": 0.6462322473526001, + "step": 83 + }, + { + "epoch": 0.041517360682070924, + "grad_norm": 0.6985575396830678, + "learning_rate": 8.177339901477834e-06, + "loss": 0.6542905569076538, + "step": 84 + }, + { + "epoch": 0.0420116149759051, + "grad_norm": 0.6800738258763197, + "learning_rate": 8.275862068965518e-06, + "loss": 0.6539976000785828, + "step": 85 + }, + { + "epoch": 0.04250586926973928, + "grad_norm": 0.6805451756514653, + "learning_rate": 8.374384236453203e-06, + "loss": 0.6303049325942993, + "step": 86 + }, + { + "epoch": 0.04300012356357346, + "grad_norm": 0.6262637687675628, + "learning_rate": 8.472906403940888e-06, + "loss": 0.5727078318595886, + "step": 87 + }, + { + "epoch": 0.04349437785740764, + "grad_norm": 0.6392194157453778, + "learning_rate": 8.571428571428571e-06, + "loss": 0.6204914450645447, + "step": 88 + }, + { + "epoch": 0.04398863215124181, + "grad_norm": 0.8144620373591464, + "learning_rate": 8.669950738916257e-06, + "loss": 0.633359432220459, + "step": 89 + }, + { + "epoch": 0.04448288644507599, + "grad_norm": 0.6564252660453104, + "learning_rate": 8.768472906403942e-06, + "loss": 0.5737719535827637, + "step": 90 + }, + { + "epoch": 0.04497714073891017, + "grad_norm": 0.704224097621618, + "learning_rate": 8.866995073891627e-06, + "loss": 0.6438707709312439, + "step": 91 + }, + { + "epoch": 0.04547139503274435, + "grad_norm": 0.7123681566966987, + "learning_rate": 8.965517241379312e-06, + "loss": 0.6284823417663574, + "step": 92 + }, + { + "epoch": 0.045965649326578525, + "grad_norm": 0.6879682376399587, + "learning_rate": 9.064039408866996e-06, + "loss": 0.6442058086395264, + "step": 93 + }, + { + "epoch": 0.046459903620412704, + "grad_norm": 0.709934515039082, + "learning_rate": 9.162561576354681e-06, + "loss": 0.5821751356124878, + "step": 94 + }, + { + "epoch": 0.04695415791424688, + "grad_norm": 1.530236961676562, + "learning_rate": 9.261083743842364e-06, + "loss": 0.546042263507843, + "step": 95 + }, + { + "epoch": 0.047448412208081055, + "grad_norm": 0.6844457378175872, + "learning_rate": 9.359605911330049e-06, + "loss": 0.5743244886398315, + "step": 96 + }, + { + "epoch": 0.04794266650191523, + "grad_norm": 0.6876016450255833, + "learning_rate": 9.458128078817734e-06, + "loss": 0.5775831341743469, + "step": 97 + }, + { + "epoch": 0.04843692079574941, + "grad_norm": 0.6367125491834975, + "learning_rate": 9.55665024630542e-06, + "loss": 0.5632016658782959, + "step": 98 + }, + { + "epoch": 0.04893117508958359, + "grad_norm": 0.635357516984843, + "learning_rate": 9.655172413793105e-06, + "loss": 0.5817564129829407, + "step": 99 + }, + { + "epoch": 0.04942542938341777, + "grad_norm": 0.6380730461382318, + "learning_rate": 9.75369458128079e-06, + "loss": 0.5692225098609924, + "step": 100 + }, + { + "epoch": 0.04991968367725195, + "grad_norm": 0.6016319910280624, + "learning_rate": 9.852216748768475e-06, + "loss": 0.5239434242248535, + "step": 101 + }, + { + "epoch": 0.05041393797108613, + "grad_norm": 0.6757811368400487, + "learning_rate": 9.95073891625616e-06, + "loss": 0.543138861656189, + "step": 102 + }, + { + "epoch": 0.0509081922649203, + "grad_norm": 0.6907500926239555, + "learning_rate": 1.0049261083743844e-05, + "loss": 0.5914052128791809, + "step": 103 + }, + { + "epoch": 0.05140244655875448, + "grad_norm": 0.657964391130701, + "learning_rate": 1.0147783251231529e-05, + "loss": 0.5394442081451416, + "step": 104 + }, + { + "epoch": 0.051896700852588656, + "grad_norm": 0.6411875370567456, + "learning_rate": 1.0246305418719214e-05, + "loss": 0.6157902479171753, + "step": 105 + }, + { + "epoch": 0.052390955146422835, + "grad_norm": 0.738818036033501, + "learning_rate": 1.0344827586206898e-05, + "loss": 0.5863415598869324, + "step": 106 + }, + { + "epoch": 0.052885209440257014, + "grad_norm": 0.7066380161278255, + "learning_rate": 1.0443349753694583e-05, + "loss": 0.5783145427703857, + "step": 107 + }, + { + "epoch": 0.05337946373409119, + "grad_norm": 0.6486663261886427, + "learning_rate": 1.0541871921182268e-05, + "loss": 0.5761469006538391, + "step": 108 + }, + { + "epoch": 0.053873718027925364, + "grad_norm": 0.7011826885785277, + "learning_rate": 1.0640394088669953e-05, + "loss": 0.5931205749511719, + "step": 109 + }, + { + "epoch": 0.05436797232175954, + "grad_norm": 0.6624296231637669, + "learning_rate": 1.0738916256157637e-05, + "loss": 0.5429986119270325, + "step": 110 + }, + { + "epoch": 0.05486222661559372, + "grad_norm": 0.758180242025479, + "learning_rate": 1.083743842364532e-05, + "loss": 0.5154455304145813, + "step": 111 + }, + { + "epoch": 0.0553564809094279, + "grad_norm": 0.6631694030017043, + "learning_rate": 1.0935960591133005e-05, + "loss": 0.5465028285980225, + "step": 112 + }, + { + "epoch": 0.05585073520326208, + "grad_norm": 0.7234030186547562, + "learning_rate": 1.103448275862069e-05, + "loss": 0.5973349213600159, + "step": 113 + }, + { + "epoch": 0.05634498949709626, + "grad_norm": 0.8062494007312124, + "learning_rate": 1.1133004926108375e-05, + "loss": 0.6201578378677368, + "step": 114 + }, + { + "epoch": 0.05683924379093044, + "grad_norm": 0.7754913697435033, + "learning_rate": 1.123152709359606e-05, + "loss": 0.5090143084526062, + "step": 115 + }, + { + "epoch": 0.05733349808476461, + "grad_norm": 0.7128751966577052, + "learning_rate": 1.1330049261083744e-05, + "loss": 0.5275869369506836, + "step": 116 + }, + { + "epoch": 0.05782775237859879, + "grad_norm": 0.6950533949454222, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.571302056312561, + "step": 117 + }, + { + "epoch": 0.058322006672432966, + "grad_norm": 0.727683614551879, + "learning_rate": 1.1527093596059114e-05, + "loss": 0.5920293927192688, + "step": 118 + }, + { + "epoch": 0.058816260966267145, + "grad_norm": 0.7151674344713859, + "learning_rate": 1.1625615763546799e-05, + "loss": 0.5877068042755127, + "step": 119 + }, + { + "epoch": 0.059310515260101324, + "grad_norm": 0.7467125629300125, + "learning_rate": 1.1724137931034483e-05, + "loss": 0.6140042543411255, + "step": 120 + }, + { + "epoch": 0.0598047695539355, + "grad_norm": 0.7531213899377466, + "learning_rate": 1.182266009852217e-05, + "loss": 0.5642052292823792, + "step": 121 + }, + { + "epoch": 0.060299023847769674, + "grad_norm": 0.7258097143889621, + "learning_rate": 1.1921182266009855e-05, + "loss": 0.5535261034965515, + "step": 122 + }, + { + "epoch": 0.06079327814160385, + "grad_norm": 0.6906824437380253, + "learning_rate": 1.201970443349754e-05, + "loss": 0.5202849507331848, + "step": 123 + }, + { + "epoch": 0.06128753243543803, + "grad_norm": 0.7290752273219125, + "learning_rate": 1.2118226600985224e-05, + "loss": 0.5626791715621948, + "step": 124 + }, + { + "epoch": 0.06178178672927221, + "grad_norm": 0.6770400510110369, + "learning_rate": 1.2216748768472909e-05, + "loss": 0.5416101217269897, + "step": 125 + }, + { + "epoch": 0.06227604102310639, + "grad_norm": 0.730080694043851, + "learning_rate": 1.2315270935960592e-05, + "loss": 0.5683388710021973, + "step": 126 + }, + { + "epoch": 0.06277029531694056, + "grad_norm": 0.7617011668537459, + "learning_rate": 1.2413793103448277e-05, + "loss": 0.564468264579773, + "step": 127 + }, + { + "epoch": 0.06326454961077474, + "grad_norm": 0.7085057216007719, + "learning_rate": 1.2512315270935961e-05, + "loss": 0.5419844388961792, + "step": 128 + }, + { + "epoch": 0.06375880390460892, + "grad_norm": 0.7653624040034734, + "learning_rate": 1.2610837438423646e-05, + "loss": 0.51283860206604, + "step": 129 + }, + { + "epoch": 0.0642530581984431, + "grad_norm": 0.8138449595397697, + "learning_rate": 1.2709359605911331e-05, + "loss": 0.5807296633720398, + "step": 130 + }, + { + "epoch": 0.06474731249227728, + "grad_norm": 0.6723079879875923, + "learning_rate": 1.2807881773399016e-05, + "loss": 0.5277815461158752, + "step": 131 + }, + { + "epoch": 0.06524156678611145, + "grad_norm": 0.6681532618442926, + "learning_rate": 1.29064039408867e-05, + "loss": 0.5044680833816528, + "step": 132 + }, + { + "epoch": 0.06573582107994563, + "grad_norm": 0.753382083900827, + "learning_rate": 1.3004926108374385e-05, + "loss": 0.5412886738777161, + "step": 133 + }, + { + "epoch": 0.06623007537377981, + "grad_norm": 0.7168767227212489, + "learning_rate": 1.310344827586207e-05, + "loss": 0.5314532518386841, + "step": 134 + }, + { + "epoch": 0.06672432966761399, + "grad_norm": 0.8393067756176276, + "learning_rate": 1.3201970443349755e-05, + "loss": 0.5544138550758362, + "step": 135 + }, + { + "epoch": 0.06721858396144817, + "grad_norm": 0.7720251101355328, + "learning_rate": 1.330049261083744e-05, + "loss": 0.5745705366134644, + "step": 136 + }, + { + "epoch": 0.06771283825528235, + "grad_norm": 0.8433611027798503, + "learning_rate": 1.3399014778325124e-05, + "loss": 0.5361800789833069, + "step": 137 + }, + { + "epoch": 0.06820709254911653, + "grad_norm": 0.7945865329579561, + "learning_rate": 1.3497536945812807e-05, + "loss": 0.5878221392631531, + "step": 138 + }, + { + "epoch": 0.06870134684295069, + "grad_norm": 0.7847520309491554, + "learning_rate": 1.3596059113300492e-05, + "loss": 0.5952787399291992, + "step": 139 + }, + { + "epoch": 0.06919560113678487, + "grad_norm": 0.7556944357281568, + "learning_rate": 1.369458128078818e-05, + "loss": 0.5334340929985046, + "step": 140 + }, + { + "epoch": 0.06968985543061905, + "grad_norm": 0.7730405260844581, + "learning_rate": 1.3793103448275863e-05, + "loss": 0.5297533273696899, + "step": 141 + }, + { + "epoch": 0.07018410972445323, + "grad_norm": 0.7838373123609123, + "learning_rate": 1.3891625615763548e-05, + "loss": 0.5388105511665344, + "step": 142 + }, + { + "epoch": 0.0706783640182874, + "grad_norm": 0.6827867428906486, + "learning_rate": 1.3990147783251233e-05, + "loss": 0.484375536441803, + "step": 143 + }, + { + "epoch": 0.07117261831212159, + "grad_norm": 0.7377838543831393, + "learning_rate": 1.4088669950738918e-05, + "loss": 0.5395358800888062, + "step": 144 + }, + { + "epoch": 0.07166687260595576, + "grad_norm": 0.7024037339686016, + "learning_rate": 1.4187192118226602e-05, + "loss": 0.501459538936615, + "step": 145 + }, + { + "epoch": 0.07216112689978994, + "grad_norm": 0.7544878056630825, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.5390491485595703, + "step": 146 + }, + { + "epoch": 0.07265538119362412, + "grad_norm": 0.7358581376182646, + "learning_rate": 1.4384236453201972e-05, + "loss": 0.505649745464325, + "step": 147 + }, + { + "epoch": 0.0731496354874583, + "grad_norm": 0.791834759029257, + "learning_rate": 1.4482758620689657e-05, + "loss": 0.5155121684074402, + "step": 148 + }, + { + "epoch": 0.07364388978129248, + "grad_norm": 0.9182625859668322, + "learning_rate": 1.4581280788177341e-05, + "loss": 0.5502114295959473, + "step": 149 + }, + { + "epoch": 0.07413814407512666, + "grad_norm": 0.7705513444985356, + "learning_rate": 1.4679802955665026e-05, + "loss": 0.5243497490882874, + "step": 150 + }, + { + "epoch": 0.07463239836896084, + "grad_norm": 0.7936247647794451, + "learning_rate": 1.4778325123152711e-05, + "loss": 0.529721736907959, + "step": 151 + }, + { + "epoch": 0.075126652662795, + "grad_norm": 0.7493387955752852, + "learning_rate": 1.4876847290640396e-05, + "loss": 0.4721008241176605, + "step": 152 + }, + { + "epoch": 0.07562090695662918, + "grad_norm": 0.8448372107109295, + "learning_rate": 1.4975369458128079e-05, + "loss": 0.46029576659202576, + "step": 153 + }, + { + "epoch": 0.07611516125046336, + "grad_norm": 0.8666504632745452, + "learning_rate": 1.5073891625615764e-05, + "loss": 0.5151746273040771, + "step": 154 + }, + { + "epoch": 0.07660941554429754, + "grad_norm": 0.8234378506914858, + "learning_rate": 1.5172413793103448e-05, + "loss": 0.4743254780769348, + "step": 155 + }, + { + "epoch": 0.07710366983813172, + "grad_norm": 0.7901189046711773, + "learning_rate": 1.5270935960591133e-05, + "loss": 0.5167561769485474, + "step": 156 + }, + { + "epoch": 0.0775979241319659, + "grad_norm": 0.7442599788530032, + "learning_rate": 1.536945812807882e-05, + "loss": 0.47482365369796753, + "step": 157 + }, + { + "epoch": 0.07809217842580007, + "grad_norm": 0.7472930500337165, + "learning_rate": 1.5467980295566506e-05, + "loss": 0.5088409781455994, + "step": 158 + }, + { + "epoch": 0.07858643271963425, + "grad_norm": 0.839637174922739, + "learning_rate": 1.556650246305419e-05, + "loss": 0.5264201164245605, + "step": 159 + }, + { + "epoch": 0.07908068701346843, + "grad_norm": 0.8043048232381864, + "learning_rate": 1.5665024630541875e-05, + "loss": 0.5475984811782837, + "step": 160 + }, + { + "epoch": 0.07957494130730261, + "grad_norm": 0.813963733997232, + "learning_rate": 1.576354679802956e-05, + "loss": 0.5652282238006592, + "step": 161 + }, + { + "epoch": 0.08006919560113679, + "grad_norm": 0.8257458665080726, + "learning_rate": 1.586206896551724e-05, + "loss": 0.5179979801177979, + "step": 162 + }, + { + "epoch": 0.08056344989497097, + "grad_norm": 0.7453513460678786, + "learning_rate": 1.5960591133004928e-05, + "loss": 0.4966253638267517, + "step": 163 + }, + { + "epoch": 0.08105770418880515, + "grad_norm": 0.7400908854625781, + "learning_rate": 1.605911330049261e-05, + "loss": 0.5216315388679504, + "step": 164 + }, + { + "epoch": 0.08155195848263931, + "grad_norm": 0.7974617542166776, + "learning_rate": 1.6157635467980298e-05, + "loss": 0.495576411485672, + "step": 165 + }, + { + "epoch": 0.08204621277647349, + "grad_norm": 0.7828217496299378, + "learning_rate": 1.625615763546798e-05, + "loss": 0.5101697444915771, + "step": 166 + }, + { + "epoch": 0.08254046707030767, + "grad_norm": 0.7891722656265441, + "learning_rate": 1.6354679802955667e-05, + "loss": 0.5438036918640137, + "step": 167 + }, + { + "epoch": 0.08303472136414185, + "grad_norm": 0.8062908900423786, + "learning_rate": 1.645320197044335e-05, + "loss": 0.5043500661849976, + "step": 168 + }, + { + "epoch": 0.08352897565797603, + "grad_norm": 0.8893145421032131, + "learning_rate": 1.6551724137931037e-05, + "loss": 0.5129355788230896, + "step": 169 + }, + { + "epoch": 0.0840232299518102, + "grad_norm": 0.8344265538652059, + "learning_rate": 1.665024630541872e-05, + "loss": 0.48643916845321655, + "step": 170 + }, + { + "epoch": 0.08451748424564438, + "grad_norm": 0.9138503767586129, + "learning_rate": 1.6748768472906406e-05, + "loss": 0.5300272703170776, + "step": 171 + }, + { + "epoch": 0.08501173853947856, + "grad_norm": 0.9819214205489949, + "learning_rate": 1.684729064039409e-05, + "loss": 0.5321004390716553, + "step": 172 + }, + { + "epoch": 0.08550599283331274, + "grad_norm": 0.9555025734347583, + "learning_rate": 1.6945812807881776e-05, + "loss": 0.5066401958465576, + "step": 173 + }, + { + "epoch": 0.08600024712714692, + "grad_norm": 0.8139597552129452, + "learning_rate": 1.704433497536946e-05, + "loss": 0.48993563652038574, + "step": 174 + }, + { + "epoch": 0.0864945014209811, + "grad_norm": 0.8921248257221488, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.5468013882637024, + "step": 175 + }, + { + "epoch": 0.08698875571481528, + "grad_norm": 0.8277628260630481, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.5081865191459656, + "step": 176 + }, + { + "epoch": 0.08748301000864946, + "grad_norm": 0.7727605442624492, + "learning_rate": 1.7339901477832515e-05, + "loss": 0.48374873399734497, + "step": 177 + }, + { + "epoch": 0.08797726430248362, + "grad_norm": 0.7716185332367417, + "learning_rate": 1.7438423645320198e-05, + "loss": 0.4929465651512146, + "step": 178 + }, + { + "epoch": 0.0884715185963178, + "grad_norm": 0.7369259534742475, + "learning_rate": 1.7536945812807884e-05, + "loss": 0.49666428565979004, + "step": 179 + }, + { + "epoch": 0.08896577289015198, + "grad_norm": 0.9095846029993176, + "learning_rate": 1.7635467980295567e-05, + "loss": 0.5705476403236389, + "step": 180 + }, + { + "epoch": 0.08946002718398616, + "grad_norm": 0.8153458294604309, + "learning_rate": 1.7733990147783254e-05, + "loss": 0.5466605424880981, + "step": 181 + }, + { + "epoch": 0.08995428147782034, + "grad_norm": 0.7908211366510465, + "learning_rate": 1.7832512315270937e-05, + "loss": 0.47837337851524353, + "step": 182 + }, + { + "epoch": 0.09044853577165451, + "grad_norm": 0.8050205335034676, + "learning_rate": 1.7931034482758623e-05, + "loss": 0.5370041131973267, + "step": 183 + }, + { + "epoch": 0.0909427900654887, + "grad_norm": 0.8315453873696782, + "learning_rate": 1.8029556650246306e-05, + "loss": 0.540340006351471, + "step": 184 + }, + { + "epoch": 0.09143704435932287, + "grad_norm": 0.7864886396514408, + "learning_rate": 1.8128078817733993e-05, + "loss": 0.5165396928787231, + "step": 185 + }, + { + "epoch": 0.09193129865315705, + "grad_norm": 1.0212742677335798, + "learning_rate": 1.8226600985221676e-05, + "loss": 0.5391616821289062, + "step": 186 + }, + { + "epoch": 0.09242555294699123, + "grad_norm": 0.8362655612683817, + "learning_rate": 1.8325123152709362e-05, + "loss": 0.472774475812912, + "step": 187 + }, + { + "epoch": 0.09291980724082541, + "grad_norm": 0.7994913228950927, + "learning_rate": 1.8423645320197045e-05, + "loss": 0.5079161524772644, + "step": 188 + }, + { + "epoch": 0.09341406153465959, + "grad_norm": 0.7908069143027292, + "learning_rate": 1.852216748768473e-05, + "loss": 0.4909520149230957, + "step": 189 + }, + { + "epoch": 0.09390831582849377, + "grad_norm": 0.8204263481704893, + "learning_rate": 1.8620689655172415e-05, + "loss": 0.5214540362358093, + "step": 190 + }, + { + "epoch": 0.09440257012232794, + "grad_norm": 1.0097310819423937, + "learning_rate": 1.8719211822660098e-05, + "loss": 0.4820341467857361, + "step": 191 + }, + { + "epoch": 0.09489682441616211, + "grad_norm": 0.7986122947719724, + "learning_rate": 1.8817733990147784e-05, + "loss": 0.5094855427742004, + "step": 192 + }, + { + "epoch": 0.09539107870999629, + "grad_norm": 0.8104059351445748, + "learning_rate": 1.8916256157635468e-05, + "loss": 0.47840312123298645, + "step": 193 + }, + { + "epoch": 0.09588533300383047, + "grad_norm": 0.8556791067143968, + "learning_rate": 1.9014778325123154e-05, + "loss": 0.5368070602416992, + "step": 194 + }, + { + "epoch": 0.09637958729766465, + "grad_norm": 0.8413108625552047, + "learning_rate": 1.911330049261084e-05, + "loss": 0.493880033493042, + "step": 195 + }, + { + "epoch": 0.09687384159149882, + "grad_norm": 0.8344269563446816, + "learning_rate": 1.9211822660098524e-05, + "loss": 0.5052261352539062, + "step": 196 + }, + { + "epoch": 0.097368095885333, + "grad_norm": 0.8488100596559239, + "learning_rate": 1.931034482758621e-05, + "loss": 0.4817495346069336, + "step": 197 + }, + { + "epoch": 0.09786235017916718, + "grad_norm": 0.8835550004433761, + "learning_rate": 1.9408866995073893e-05, + "loss": 0.530259370803833, + "step": 198 + }, + { + "epoch": 0.09835660447300136, + "grad_norm": 0.8634602606490965, + "learning_rate": 1.950738916256158e-05, + "loss": 0.4984540045261383, + "step": 199 + }, + { + "epoch": 0.09885085876683554, + "grad_norm": 0.8655848178642821, + "learning_rate": 1.9605911330049263e-05, + "loss": 0.5472708940505981, + "step": 200 + }, + { + "epoch": 0.09934511306066972, + "grad_norm": 0.8520053240792014, + "learning_rate": 1.970443349753695e-05, + "loss": 0.5394926071166992, + "step": 201 + }, + { + "epoch": 0.0998393673545039, + "grad_norm": 0.9089636816290306, + "learning_rate": 1.9802955665024632e-05, + "loss": 0.5299160480499268, + "step": 202 + }, + { + "epoch": 0.10033362164833808, + "grad_norm": 0.9396000630272938, + "learning_rate": 1.990147783251232e-05, + "loss": 0.506400465965271, + "step": 203 + }, + { + "epoch": 0.10082787594217225, + "grad_norm": 0.7711226267847403, + "learning_rate": 2e-05, + "loss": 0.47956231236457825, + "step": 204 + }, + { + "epoch": 0.10132213023600642, + "grad_norm": 0.8971065288988803, + "learning_rate": 1.9999996662071442e-05, + "loss": 0.48805660009384155, + "step": 205 + }, + { + "epoch": 0.1018163845298406, + "grad_norm": 0.8419748393313904, + "learning_rate": 1.9999986648287996e-05, + "loss": 0.46014025807380676, + "step": 206 + }, + { + "epoch": 0.10231063882367478, + "grad_norm": 0.8667704651728929, + "learning_rate": 1.9999969958656345e-05, + "loss": 0.4654610753059387, + "step": 207 + }, + { + "epoch": 0.10280489311750896, + "grad_norm": 0.8143872307343123, + "learning_rate": 1.999994659318763e-05, + "loss": 0.47037336230278015, + "step": 208 + }, + { + "epoch": 0.10329914741134313, + "grad_norm": 0.8238466130965688, + "learning_rate": 1.999991655189745e-05, + "loss": 0.4853154718875885, + "step": 209 + }, + { + "epoch": 0.10379340170517731, + "grad_norm": 0.8233043672230826, + "learning_rate": 1.9999879834805865e-05, + "loss": 0.4918109178543091, + "step": 210 + }, + { + "epoch": 0.10428765599901149, + "grad_norm": 0.787297319281164, + "learning_rate": 1.999983644193738e-05, + "loss": 0.5136955380439758, + "step": 211 + }, + { + "epoch": 0.10478191029284567, + "grad_norm": 0.8895124065919626, + "learning_rate": 1.9999786373320972e-05, + "loss": 0.5145115852355957, + "step": 212 + }, + { + "epoch": 0.10527616458667985, + "grad_norm": 0.8153315460424436, + "learning_rate": 1.9999729628990058e-05, + "loss": 0.4624764025211334, + "step": 213 + }, + { + "epoch": 0.10577041888051403, + "grad_norm": 0.7949012412003572, + "learning_rate": 1.9999666208982518e-05, + "loss": 0.4599718749523163, + "step": 214 + }, + { + "epoch": 0.1062646731743482, + "grad_norm": 0.8110981138692489, + "learning_rate": 1.99995961133407e-05, + "loss": 0.4642864465713501, + "step": 215 + }, + { + "epoch": 0.10675892746818239, + "grad_norm": 0.7785663242974379, + "learning_rate": 1.9999519342111392e-05, + "loss": 0.4756677448749542, + "step": 216 + }, + { + "epoch": 0.10725318176201656, + "grad_norm": 0.8781173550322721, + "learning_rate": 1.9999435895345846e-05, + "loss": 0.4982803463935852, + "step": 217 + }, + { + "epoch": 0.10774743605585073, + "grad_norm": 0.905677346569408, + "learning_rate": 1.999934577309977e-05, + "loss": 0.5189295411109924, + "step": 218 + }, + { + "epoch": 0.10824169034968491, + "grad_norm": 0.8870093356565885, + "learning_rate": 1.999924897543333e-05, + "loss": 0.5077873468399048, + "step": 219 + }, + { + "epoch": 0.10873594464351909, + "grad_norm": 0.9164316488089079, + "learning_rate": 1.9999145502411148e-05, + "loss": 0.5510451793670654, + "step": 220 + }, + { + "epoch": 0.10923019893735327, + "grad_norm": 0.850640343977404, + "learning_rate": 1.9999035354102298e-05, + "loss": 0.44604551792144775, + "step": 221 + }, + { + "epoch": 0.10972445323118744, + "grad_norm": 0.7739778425864705, + "learning_rate": 1.9998918530580315e-05, + "loss": 0.42567160725593567, + "step": 222 + }, + { + "epoch": 0.11021870752502162, + "grad_norm": 0.8699648367810445, + "learning_rate": 1.9998795031923186e-05, + "loss": 0.4622190594673157, + "step": 223 + }, + { + "epoch": 0.1107129618188558, + "grad_norm": 0.8261943707290175, + "learning_rate": 1.999866485821336e-05, + "loss": 0.5023611783981323, + "step": 224 + }, + { + "epoch": 0.11120721611268998, + "grad_norm": 0.769204860463621, + "learning_rate": 1.9998528009537735e-05, + "loss": 0.451701819896698, + "step": 225 + }, + { + "epoch": 0.11170147040652416, + "grad_norm": 0.9053438794448195, + "learning_rate": 1.9998384485987675e-05, + "loss": 0.48493725061416626, + "step": 226 + }, + { + "epoch": 0.11219572470035834, + "grad_norm": 0.7780216873284675, + "learning_rate": 1.9998234287658996e-05, + "loss": 0.45377853512763977, + "step": 227 + }, + { + "epoch": 0.11268997899419252, + "grad_norm": 0.9129521331875277, + "learning_rate": 1.9998077414651957e-05, + "loss": 0.48963701725006104, + "step": 228 + }, + { + "epoch": 0.1131842332880267, + "grad_norm": 0.8500208947168179, + "learning_rate": 1.9997913867071296e-05, + "loss": 0.47935402393341064, + "step": 229 + }, + { + "epoch": 0.11367848758186087, + "grad_norm": 0.8984825507205957, + "learning_rate": 1.999774364502619e-05, + "loss": 0.46203523874282837, + "step": 230 + }, + { + "epoch": 0.11417274187569504, + "grad_norm": 0.8695917880315948, + "learning_rate": 1.9997566748630274e-05, + "loss": 0.4411412179470062, + "step": 231 + }, + { + "epoch": 0.11466699616952922, + "grad_norm": 0.9063292151670944, + "learning_rate": 1.9997383178001646e-05, + "loss": 0.44424787163734436, + "step": 232 + }, + { + "epoch": 0.1151612504633634, + "grad_norm": 0.9239108187837685, + "learning_rate": 1.9997192933262853e-05, + "loss": 0.4862042963504791, + "step": 233 + }, + { + "epoch": 0.11565550475719757, + "grad_norm": 0.9583721120887143, + "learning_rate": 1.99969960145409e-05, + "loss": 0.49599340558052063, + "step": 234 + }, + { + "epoch": 0.11614975905103175, + "grad_norm": 0.8373453660412895, + "learning_rate": 1.999679242196725e-05, + "loss": 0.49702027440071106, + "step": 235 + }, + { + "epoch": 0.11664401334486593, + "grad_norm": 0.9122480348696357, + "learning_rate": 1.9996582155677813e-05, + "loss": 0.520037829875946, + "step": 236 + }, + { + "epoch": 0.11713826763870011, + "grad_norm": 0.8765545420336399, + "learning_rate": 1.999636521581296e-05, + "loss": 0.4571160674095154, + "step": 237 + }, + { + "epoch": 0.11763252193253429, + "grad_norm": 0.8739431997449725, + "learning_rate": 1.9996141602517526e-05, + "loss": 0.45602840185165405, + "step": 238 + }, + { + "epoch": 0.11812677622636847, + "grad_norm": 0.8737753030098584, + "learning_rate": 1.999591131594078e-05, + "loss": 0.4909728169441223, + "step": 239 + }, + { + "epoch": 0.11862103052020265, + "grad_norm": 0.9637438681008479, + "learning_rate": 1.9995674356236468e-05, + "loss": 0.47716090083122253, + "step": 240 + }, + { + "epoch": 0.11911528481403683, + "grad_norm": 0.8781513787464966, + "learning_rate": 1.9995430723562774e-05, + "loss": 0.4449527859687805, + "step": 241 + }, + { + "epoch": 0.119609539107871, + "grad_norm": 0.9278951723441426, + "learning_rate": 1.9995180418082347e-05, + "loss": 0.49069035053253174, + "step": 242 + }, + { + "epoch": 0.12010379340170518, + "grad_norm": 0.8082383806465664, + "learning_rate": 1.9994923439962286e-05, + "loss": 0.506738543510437, + "step": 243 + }, + { + "epoch": 0.12059804769553935, + "grad_norm": 0.7256243644120642, + "learning_rate": 1.9994659789374145e-05, + "loss": 0.38516658544540405, + "step": 244 + }, + { + "epoch": 0.12109230198937353, + "grad_norm": 1.120005864402108, + "learning_rate": 1.9994389466493942e-05, + "loss": 0.49539780616760254, + "step": 245 + }, + { + "epoch": 0.1215865562832077, + "grad_norm": 0.8099291045850996, + "learning_rate": 1.999411247150213e-05, + "loss": 0.4400706887245178, + "step": 246 + }, + { + "epoch": 0.12208081057704188, + "grad_norm": 1.033732324753182, + "learning_rate": 1.9993828804583625e-05, + "loss": 0.48815736174583435, + "step": 247 + }, + { + "epoch": 0.12257506487087606, + "grad_norm": 0.8506340248073136, + "learning_rate": 1.999353846592781e-05, + "loss": 0.42744773626327515, + "step": 248 + }, + { + "epoch": 0.12306931916471024, + "grad_norm": 0.8847437809130215, + "learning_rate": 1.9993241455728505e-05, + "loss": 0.4370969235897064, + "step": 249 + }, + { + "epoch": 0.12356357345854442, + "grad_norm": 0.8643380888364789, + "learning_rate": 1.9992937774183988e-05, + "loss": 0.4803960621356964, + "step": 250 + }, + { + "epoch": 0.1240578277523786, + "grad_norm": 0.8986867692232635, + "learning_rate": 1.9992627421496994e-05, + "loss": 0.4614640474319458, + "step": 251 + }, + { + "epoch": 0.12455208204621278, + "grad_norm": 0.819634526245566, + "learning_rate": 1.9992310397874715e-05, + "loss": 0.46626490354537964, + "step": 252 + }, + { + "epoch": 0.12504633634004694, + "grad_norm": 0.8614062439986471, + "learning_rate": 1.9991986703528784e-05, + "loss": 0.4812886416912079, + "step": 253 + }, + { + "epoch": 0.12554059063388112, + "grad_norm": 0.782352455662906, + "learning_rate": 1.99916563386753e-05, + "loss": 0.45037686824798584, + "step": 254 + }, + { + "epoch": 0.1260348449277153, + "grad_norm": 0.8735972282090627, + "learning_rate": 1.9991319303534804e-05, + "loss": 0.48492124676704407, + "step": 255 + }, + { + "epoch": 0.12652909922154948, + "grad_norm": 0.9123971905878313, + "learning_rate": 1.9990975598332304e-05, + "loss": 0.48825496435165405, + "step": 256 + }, + { + "epoch": 0.12702335351538366, + "grad_norm": 0.9350748088966393, + "learning_rate": 1.9990625223297244e-05, + "loss": 0.4836634695529938, + "step": 257 + }, + { + "epoch": 0.12751760780921784, + "grad_norm": 0.8091067369882244, + "learning_rate": 1.9990268178663538e-05, + "loss": 0.4632943272590637, + "step": 258 + }, + { + "epoch": 0.12801186210305202, + "grad_norm": 0.8933963237824735, + "learning_rate": 1.9989904464669533e-05, + "loss": 0.4601137042045593, + "step": 259 + }, + { + "epoch": 0.1285061163968862, + "grad_norm": 0.956219889400008, + "learning_rate": 1.998953408155805e-05, + "loss": 0.4390139579772949, + "step": 260 + }, + { + "epoch": 0.12900037069072037, + "grad_norm": 0.8209256250218969, + "learning_rate": 1.9989157029576348e-05, + "loss": 0.45749080181121826, + "step": 261 + }, + { + "epoch": 0.12949462498455455, + "grad_norm": 0.8687280720196128, + "learning_rate": 1.998877330897614e-05, + "loss": 0.4490616023540497, + "step": 262 + }, + { + "epoch": 0.12998887927838873, + "grad_norm": 0.8048623785766325, + "learning_rate": 1.998838292001359e-05, + "loss": 0.4819987714290619, + "step": 263 + }, + { + "epoch": 0.1304831335722229, + "grad_norm": 0.8512266303867803, + "learning_rate": 1.9987985862949325e-05, + "loss": 0.4448384940624237, + "step": 264 + }, + { + "epoch": 0.1309773878660571, + "grad_norm": 0.8699526878628875, + "learning_rate": 1.9987582138048405e-05, + "loss": 0.4574149549007416, + "step": 265 + }, + { + "epoch": 0.13147164215989127, + "grad_norm": 0.8239086741829158, + "learning_rate": 1.9987171745580353e-05, + "loss": 0.4765186607837677, + "step": 266 + }, + { + "epoch": 0.13196589645372545, + "grad_norm": 0.8859727328667625, + "learning_rate": 1.998675468581915e-05, + "loss": 0.4900081753730774, + "step": 267 + }, + { + "epoch": 0.13246015074755962, + "grad_norm": 0.8200731674424109, + "learning_rate": 1.9986330959043206e-05, + "loss": 0.433933287858963, + "step": 268 + }, + { + "epoch": 0.1329544050413938, + "grad_norm": 0.8424887851968712, + "learning_rate": 1.9985900565535403e-05, + "loss": 0.452491819858551, + "step": 269 + }, + { + "epoch": 0.13344865933522798, + "grad_norm": 0.8454499255279871, + "learning_rate": 1.9985463505583062e-05, + "loss": 0.4583294987678528, + "step": 270 + }, + { + "epoch": 0.13394291362906216, + "grad_norm": 0.7993545503780815, + "learning_rate": 1.9985019779477958e-05, + "loss": 0.43183961510658264, + "step": 271 + }, + { + "epoch": 0.13443716792289634, + "grad_norm": 0.8548370246393396, + "learning_rate": 1.998456938751632e-05, + "loss": 0.48075324296951294, + "step": 272 + }, + { + "epoch": 0.13493142221673052, + "grad_norm": 0.9002412472414919, + "learning_rate": 1.9984112329998825e-05, + "loss": 0.5131007432937622, + "step": 273 + }, + { + "epoch": 0.1354256765105647, + "grad_norm": 0.9730858409317547, + "learning_rate": 1.998364860723059e-05, + "loss": 0.4841446876525879, + "step": 274 + }, + { + "epoch": 0.13591993080439888, + "grad_norm": 0.845168898875427, + "learning_rate": 1.9983178219521194e-05, + "loss": 0.5001078248023987, + "step": 275 + }, + { + "epoch": 0.13641418509823305, + "grad_norm": 0.9216453803321015, + "learning_rate": 1.998270116718466e-05, + "loss": 0.44851893186569214, + "step": 276 + }, + { + "epoch": 0.1369084393920672, + "grad_norm": 0.8496437780068066, + "learning_rate": 1.9982217450539464e-05, + "loss": 0.4635714888572693, + "step": 277 + }, + { + "epoch": 0.13740269368590138, + "grad_norm": 0.8697167139912243, + "learning_rate": 1.9981727069908525e-05, + "loss": 0.4171838164329529, + "step": 278 + }, + { + "epoch": 0.13789694797973556, + "grad_norm": 0.9173222191020198, + "learning_rate": 1.9981230025619216e-05, + "loss": 0.4819942116737366, + "step": 279 + }, + { + "epoch": 0.13839120227356974, + "grad_norm": 0.965585018194969, + "learning_rate": 1.998072631800336e-05, + "loss": 0.47878971695899963, + "step": 280 + }, + { + "epoch": 0.13888545656740392, + "grad_norm": 0.8354999533998939, + "learning_rate": 1.9980215947397217e-05, + "loss": 0.4436519145965576, + "step": 281 + }, + { + "epoch": 0.1393797108612381, + "grad_norm": 0.9615471937507843, + "learning_rate": 1.9979698914141507e-05, + "loss": 0.4633050262928009, + "step": 282 + }, + { + "epoch": 0.13987396515507228, + "grad_norm": 0.8419828093645744, + "learning_rate": 1.9979175218581397e-05, + "loss": 0.4264826774597168, + "step": 283 + }, + { + "epoch": 0.14036821944890646, + "grad_norm": 0.9397240311894202, + "learning_rate": 1.9978644861066493e-05, + "loss": 0.47763916850090027, + "step": 284 + }, + { + "epoch": 0.14086247374274063, + "grad_norm": 0.9621046785661004, + "learning_rate": 1.997810784195086e-05, + "loss": 0.44895434379577637, + "step": 285 + }, + { + "epoch": 0.1413567280365748, + "grad_norm": 0.9045420673708359, + "learning_rate": 1.9977564161593e-05, + "loss": 0.4287600517272949, + "step": 286 + }, + { + "epoch": 0.141850982330409, + "grad_norm": 0.9070406248365095, + "learning_rate": 1.997701382035587e-05, + "loss": 0.44175297021865845, + "step": 287 + }, + { + "epoch": 0.14234523662424317, + "grad_norm": 0.9409958894859969, + "learning_rate": 1.9976456818606868e-05, + "loss": 0.4393232464790344, + "step": 288 + }, + { + "epoch": 0.14283949091807735, + "grad_norm": 0.9574764348211552, + "learning_rate": 1.9975893156717836e-05, + "loss": 0.4600023329257965, + "step": 289 + }, + { + "epoch": 0.14333374521191153, + "grad_norm": 0.9582932704552442, + "learning_rate": 1.9975322835065075e-05, + "loss": 0.4819300174713135, + "step": 290 + }, + { + "epoch": 0.1438279995057457, + "grad_norm": 0.8798665685233671, + "learning_rate": 1.9974745854029318e-05, + "loss": 0.4391498267650604, + "step": 291 + }, + { + "epoch": 0.14432225379957989, + "grad_norm": 0.8278978827145046, + "learning_rate": 1.9974162213995748e-05, + "loss": 0.43435904383659363, + "step": 292 + }, + { + "epoch": 0.14481650809341406, + "grad_norm": 0.8555919001416697, + "learning_rate": 1.9973571915354e-05, + "loss": 0.43575727939605713, + "step": 293 + }, + { + "epoch": 0.14531076238724824, + "grad_norm": 0.847472972308698, + "learning_rate": 1.9972974958498145e-05, + "loss": 0.39998459815979004, + "step": 294 + }, + { + "epoch": 0.14580501668108242, + "grad_norm": 0.9068432330089449, + "learning_rate": 1.9972371343826705e-05, + "loss": 0.4620361030101776, + "step": 295 + }, + { + "epoch": 0.1462992709749166, + "grad_norm": 0.9496965104492539, + "learning_rate": 1.9971761071742644e-05, + "loss": 0.5172264575958252, + "step": 296 + }, + { + "epoch": 0.14679352526875078, + "grad_norm": 0.9234160870013586, + "learning_rate": 1.997114414265337e-05, + "loss": 0.4685489535331726, + "step": 297 + }, + { + "epoch": 0.14728777956258496, + "grad_norm": 0.8830728533856737, + "learning_rate": 1.9970520556970735e-05, + "loss": 0.4346499741077423, + "step": 298 + }, + { + "epoch": 0.14778203385641914, + "grad_norm": 0.8462127222831192, + "learning_rate": 1.996989031511104e-05, + "loss": 0.4051141142845154, + "step": 299 + }, + { + "epoch": 0.14827628815025332, + "grad_norm": 1.5751283315817302, + "learning_rate": 1.996925341749502e-05, + "loss": 0.4862591028213501, + "step": 300 + }, + { + "epoch": 0.1487705424440875, + "grad_norm": 0.9475006076143342, + "learning_rate": 1.996860986454787e-05, + "loss": 0.44075754284858704, + "step": 301 + }, + { + "epoch": 0.14926479673792167, + "grad_norm": 0.8707373783945862, + "learning_rate": 1.99679596566992e-05, + "loss": 0.44321805238723755, + "step": 302 + }, + { + "epoch": 0.14975905103175585, + "grad_norm": 0.8195768056986794, + "learning_rate": 1.996730279438309e-05, + "loss": 0.4468157887458801, + "step": 303 + }, + { + "epoch": 0.15025330532559, + "grad_norm": 0.9918503423974457, + "learning_rate": 1.996663927803805e-05, + "loss": 0.48698270320892334, + "step": 304 + }, + { + "epoch": 0.15074755961942418, + "grad_norm": 0.9116215117394889, + "learning_rate": 1.9965969108107032e-05, + "loss": 0.41898253560066223, + "step": 305 + }, + { + "epoch": 0.15124181391325836, + "grad_norm": 0.9221438157249551, + "learning_rate": 1.9965292285037437e-05, + "loss": 0.4827130436897278, + "step": 306 + }, + { + "epoch": 0.15173606820709254, + "grad_norm": 0.8314057300557679, + "learning_rate": 1.99646088092811e-05, + "loss": 0.4219037592411041, + "step": 307 + }, + { + "epoch": 0.15223032250092672, + "grad_norm": 0.8392045773293594, + "learning_rate": 1.9963918681294298e-05, + "loss": 0.4431123733520508, + "step": 308 + }, + { + "epoch": 0.1527245767947609, + "grad_norm": 0.8500815118931239, + "learning_rate": 1.996322190153775e-05, + "loss": 0.4161941409111023, + "step": 309 + }, + { + "epoch": 0.15321883108859508, + "grad_norm": 0.9107651666369411, + "learning_rate": 1.9962518470476617e-05, + "loss": 0.4774768650531769, + "step": 310 + }, + { + "epoch": 0.15371308538242925, + "grad_norm": 0.8037347887475985, + "learning_rate": 1.9961808388580503e-05, + "loss": 0.4196036159992218, + "step": 311 + }, + { + "epoch": 0.15420733967626343, + "grad_norm": 1.0067362464519019, + "learning_rate": 1.996109165632344e-05, + "loss": 0.44241398572921753, + "step": 312 + }, + { + "epoch": 0.1547015939700976, + "grad_norm": 0.888150506782497, + "learning_rate": 1.996036827418392e-05, + "loss": 0.47662627696990967, + "step": 313 + }, + { + "epoch": 0.1551958482639318, + "grad_norm": 0.8458159023673953, + "learning_rate": 1.9959638242644855e-05, + "loss": 0.4241487979888916, + "step": 314 + }, + { + "epoch": 0.15569010255776597, + "grad_norm": 0.9355978957071136, + "learning_rate": 1.9958901562193605e-05, + "loss": 0.45686113834381104, + "step": 315 + }, + { + "epoch": 0.15618435685160015, + "grad_norm": 0.944155507976385, + "learning_rate": 1.9958158233321968e-05, + "loss": 0.4154825806617737, + "step": 316 + }, + { + "epoch": 0.15667861114543433, + "grad_norm": 0.9827195710672626, + "learning_rate": 1.9957408256526176e-05, + "loss": 0.4705435037612915, + "step": 317 + }, + { + "epoch": 0.1571728654392685, + "grad_norm": 0.9880074034620054, + "learning_rate": 1.9956651632306908e-05, + "loss": 0.4367898404598236, + "step": 318 + }, + { + "epoch": 0.15766711973310268, + "grad_norm": 0.9294773909083144, + "learning_rate": 1.9955888361169272e-05, + "loss": 0.4668901264667511, + "step": 319 + }, + { + "epoch": 0.15816137402693686, + "grad_norm": 0.9543525396859661, + "learning_rate": 1.995511844362282e-05, + "loss": 0.46429356932640076, + "step": 320 + }, + { + "epoch": 0.15865562832077104, + "grad_norm": 0.9206239653453478, + "learning_rate": 1.9954341880181536e-05, + "loss": 0.4582952857017517, + "step": 321 + }, + { + "epoch": 0.15914988261460522, + "grad_norm": 0.9460762127599929, + "learning_rate": 1.9953558671363843e-05, + "loss": 0.45110762119293213, + "step": 322 + }, + { + "epoch": 0.1596441369084394, + "grad_norm": 0.9441078381056233, + "learning_rate": 1.99527688176926e-05, + "loss": 0.4049065113067627, + "step": 323 + }, + { + "epoch": 0.16013839120227358, + "grad_norm": 0.8033040053333058, + "learning_rate": 1.9951972319695105e-05, + "loss": 0.40884825587272644, + "step": 324 + }, + { + "epoch": 0.16063264549610776, + "grad_norm": 0.902465277703788, + "learning_rate": 1.9951169177903084e-05, + "loss": 0.4416786730289459, + "step": 325 + }, + { + "epoch": 0.16112689978994194, + "grad_norm": 0.8396124025463547, + "learning_rate": 1.9950359392852704e-05, + "loss": 0.4318765103816986, + "step": 326 + }, + { + "epoch": 0.16162115408377611, + "grad_norm": 0.9197188335811614, + "learning_rate": 1.9949542965084564e-05, + "loss": 0.4415965974330902, + "step": 327 + }, + { + "epoch": 0.1621154083776103, + "grad_norm": 0.9816748337776936, + "learning_rate": 1.9948719895143703e-05, + "loss": 0.4816298186779022, + "step": 328 + }, + { + "epoch": 0.16260966267144447, + "grad_norm": 0.8960734361029558, + "learning_rate": 1.9947890183579594e-05, + "loss": 0.4329088032245636, + "step": 329 + }, + { + "epoch": 0.16310391696527862, + "grad_norm": 0.9960918612087606, + "learning_rate": 1.9947053830946134e-05, + "loss": 0.43193015456199646, + "step": 330 + }, + { + "epoch": 0.1635981712591128, + "grad_norm": 0.9310501291263382, + "learning_rate": 1.994621083780166e-05, + "loss": 0.48738086223602295, + "step": 331 + }, + { + "epoch": 0.16409242555294698, + "grad_norm": 0.9523291617618251, + "learning_rate": 1.9945361204708948e-05, + "loss": 0.4707815647125244, + "step": 332 + }, + { + "epoch": 0.16458667984678116, + "grad_norm": 0.8438149141988297, + "learning_rate": 1.9944504932235198e-05, + "loss": 0.4190637469291687, + "step": 333 + }, + { + "epoch": 0.16508093414061534, + "grad_norm": 0.9348901251563362, + "learning_rate": 1.9943642020952042e-05, + "loss": 0.45955735445022583, + "step": 334 + }, + { + "epoch": 0.16557518843444952, + "grad_norm": 0.9334033255095994, + "learning_rate": 1.9942772471435555e-05, + "loss": 0.4675702750682831, + "step": 335 + }, + { + "epoch": 0.1660694427282837, + "grad_norm": 0.9694338385909206, + "learning_rate": 1.9941896284266224e-05, + "loss": 0.42571327090263367, + "step": 336 + }, + { + "epoch": 0.16656369702211787, + "grad_norm": 0.827954024094364, + "learning_rate": 1.994101346002899e-05, + "loss": 0.4341443181037903, + "step": 337 + }, + { + "epoch": 0.16705795131595205, + "grad_norm": 0.9227161087353433, + "learning_rate": 1.9940123999313214e-05, + "loss": 0.4473035931587219, + "step": 338 + }, + { + "epoch": 0.16755220560978623, + "grad_norm": 0.9514215023205275, + "learning_rate": 1.9939227902712676e-05, + "loss": 0.4692152142524719, + "step": 339 + }, + { + "epoch": 0.1680464599036204, + "grad_norm": 0.902462533797338, + "learning_rate": 1.9938325170825607e-05, + "loss": 0.4169067442417145, + "step": 340 + }, + { + "epoch": 0.1685407141974546, + "grad_norm": 0.8958693793994358, + "learning_rate": 1.9937415804254657e-05, + "loss": 0.451092928647995, + "step": 341 + }, + { + "epoch": 0.16903496849128877, + "grad_norm": 0.9439820250269497, + "learning_rate": 1.99364998036069e-05, + "loss": 0.39640212059020996, + "step": 342 + }, + { + "epoch": 0.16952922278512295, + "grad_norm": 0.9953253959869931, + "learning_rate": 1.9935577169493854e-05, + "loss": 0.46396374702453613, + "step": 343 + }, + { + "epoch": 0.17002347707895712, + "grad_norm": 0.940542166338043, + "learning_rate": 1.9934647902531453e-05, + "loss": 0.4343748390674591, + "step": 344 + }, + { + "epoch": 0.1705177313727913, + "grad_norm": 0.8926095624124082, + "learning_rate": 1.9933712003340056e-05, + "loss": 0.4353589713573456, + "step": 345 + }, + { + "epoch": 0.17101198566662548, + "grad_norm": 0.981244679678695, + "learning_rate": 1.9932769472544464e-05, + "loss": 0.4423677623271942, + "step": 346 + }, + { + "epoch": 0.17150623996045966, + "grad_norm": 0.9632090771111401, + "learning_rate": 1.9931820310773894e-05, + "loss": 0.4382045865058899, + "step": 347 + }, + { + "epoch": 0.17200049425429384, + "grad_norm": 0.9042153187184925, + "learning_rate": 1.993086451866199e-05, + "loss": 0.3966183066368103, + "step": 348 + }, + { + "epoch": 0.17249474854812802, + "grad_norm": 0.9998736444681166, + "learning_rate": 1.9929902096846833e-05, + "loss": 0.48624011874198914, + "step": 349 + }, + { + "epoch": 0.1729890028419622, + "grad_norm": 0.9399569652966117, + "learning_rate": 1.9928933045970913e-05, + "loss": 0.4442569315433502, + "step": 350 + }, + { + "epoch": 0.17348325713579638, + "grad_norm": 0.9204808269523502, + "learning_rate": 1.992795736668116e-05, + "loss": 0.42499929666519165, + "step": 351 + }, + { + "epoch": 0.17397751142963055, + "grad_norm": 0.9507435140290256, + "learning_rate": 1.9926975059628923e-05, + "loss": 0.4230741858482361, + "step": 352 + }, + { + "epoch": 0.17447176572346473, + "grad_norm": 0.9092303670359448, + "learning_rate": 1.9925986125469974e-05, + "loss": 0.4273882806301117, + "step": 353 + }, + { + "epoch": 0.1749660200172989, + "grad_norm": 0.9603670891238569, + "learning_rate": 1.9924990564864513e-05, + "loss": 0.45237618684768677, + "step": 354 + }, + { + "epoch": 0.1754602743111331, + "grad_norm": 0.8737901526941092, + "learning_rate": 1.9923988378477165e-05, + "loss": 0.4115524888038635, + "step": 355 + }, + { + "epoch": 0.17595452860496724, + "grad_norm": 0.8886450314145863, + "learning_rate": 1.9922979566976968e-05, + "loss": 0.4476633071899414, + "step": 356 + }, + { + "epoch": 0.17644878289880142, + "grad_norm": 1.155944411883778, + "learning_rate": 1.9921964131037398e-05, + "loss": 0.44930100440979004, + "step": 357 + }, + { + "epoch": 0.1769430371926356, + "grad_norm": 1.0356351975379994, + "learning_rate": 1.9920942071336338e-05, + "loss": 0.4714374244213104, + "step": 358 + }, + { + "epoch": 0.17743729148646978, + "grad_norm": 0.9469405731486913, + "learning_rate": 1.9919913388556105e-05, + "loss": 0.47696003317832947, + "step": 359 + }, + { + "epoch": 0.17793154578030396, + "grad_norm": 0.9021123492009391, + "learning_rate": 1.9918878083383434e-05, + "loss": 0.44937074184417725, + "step": 360 + }, + { + "epoch": 0.17842580007413814, + "grad_norm": 0.9771832594876818, + "learning_rate": 1.9917836156509472e-05, + "loss": 0.44937658309936523, + "step": 361 + }, + { + "epoch": 0.17892005436797231, + "grad_norm": 0.8240548100976023, + "learning_rate": 1.9916787608629805e-05, + "loss": 0.42068418860435486, + "step": 362 + }, + { + "epoch": 0.1794143086618065, + "grad_norm": 0.9112160927316303, + "learning_rate": 1.9915732440444428e-05, + "loss": 0.3791036605834961, + "step": 363 + }, + { + "epoch": 0.17990856295564067, + "grad_norm": 0.8982890263422821, + "learning_rate": 1.991467065265775e-05, + "loss": 0.401694118976593, + "step": 364 + }, + { + "epoch": 0.18040281724947485, + "grad_norm": 0.9743587318559909, + "learning_rate": 1.9913602245978602e-05, + "loss": 0.44095057249069214, + "step": 365 + }, + { + "epoch": 0.18089707154330903, + "grad_norm": 1.0125028049881057, + "learning_rate": 1.9912527221120248e-05, + "loss": 0.435880184173584, + "step": 366 + }, + { + "epoch": 0.1813913258371432, + "grad_norm": 0.9329716691545672, + "learning_rate": 1.991144557880035e-05, + "loss": 0.4147350490093231, + "step": 367 + }, + { + "epoch": 0.1818855801309774, + "grad_norm": 1.0077861725089856, + "learning_rate": 1.9910357319741006e-05, + "loss": 0.4191502630710602, + "step": 368 + }, + { + "epoch": 0.18237983442481157, + "grad_norm": 0.9334667001994715, + "learning_rate": 1.9909262444668715e-05, + "loss": 0.41988956928253174, + "step": 369 + }, + { + "epoch": 0.18287408871864574, + "grad_norm": 1.0279430559635638, + "learning_rate": 1.99081609543144e-05, + "loss": 0.47451251745224, + "step": 370 + }, + { + "epoch": 0.18336834301247992, + "grad_norm": 0.9591522165165333, + "learning_rate": 1.9907052849413408e-05, + "loss": 0.44665899872779846, + "step": 371 + }, + { + "epoch": 0.1838625973063141, + "grad_norm": 1.0147189696208934, + "learning_rate": 1.990593813070548e-05, + "loss": 0.40575331449508667, + "step": 372 + }, + { + "epoch": 0.18435685160014828, + "grad_norm": 0.869456919545876, + "learning_rate": 1.99048167989348e-05, + "loss": 0.40580621361732483, + "step": 373 + }, + { + "epoch": 0.18485110589398246, + "grad_norm": 0.9514367145479501, + "learning_rate": 1.9903688854849948e-05, + "loss": 0.461843878030777, + "step": 374 + }, + { + "epoch": 0.18534536018781664, + "grad_norm": 0.9237949473924573, + "learning_rate": 1.990255429920392e-05, + "loss": 0.38992881774902344, + "step": 375 + }, + { + "epoch": 0.18583961448165082, + "grad_norm": 0.8831901142276523, + "learning_rate": 1.9901413132754133e-05, + "loss": 0.4288073480129242, + "step": 376 + }, + { + "epoch": 0.186333868775485, + "grad_norm": 0.9233387492673684, + "learning_rate": 1.9900265356262418e-05, + "loss": 0.4376278221607208, + "step": 377 + }, + { + "epoch": 0.18682812306931917, + "grad_norm": 1.0362403856880367, + "learning_rate": 1.9899110970495e-05, + "loss": 0.4127569794654846, + "step": 378 + }, + { + "epoch": 0.18732237736315335, + "grad_norm": 0.9507974239376735, + "learning_rate": 1.9897949976222543e-05, + "loss": 0.4221431016921997, + "step": 379 + }, + { + "epoch": 0.18781663165698753, + "grad_norm": 0.9433678538632697, + "learning_rate": 1.9896782374220108e-05, + "loss": 0.3540682792663574, + "step": 380 + }, + { + "epoch": 0.1883108859508217, + "grad_norm": 0.9261378158924178, + "learning_rate": 1.9895608165267165e-05, + "loss": 0.3746468424797058, + "step": 381 + }, + { + "epoch": 0.1888051402446559, + "grad_norm": 0.885989840984364, + "learning_rate": 1.9894427350147602e-05, + "loss": 0.44986462593078613, + "step": 382 + }, + { + "epoch": 0.18929939453849004, + "grad_norm": 0.990953109983041, + "learning_rate": 1.9893239929649716e-05, + "loss": 0.38902726769447327, + "step": 383 + }, + { + "epoch": 0.18979364883232422, + "grad_norm": 0.9780134618767543, + "learning_rate": 1.9892045904566212e-05, + "loss": 0.43202030658721924, + "step": 384 + }, + { + "epoch": 0.1902879031261584, + "grad_norm": 0.9892650612917288, + "learning_rate": 1.9890845275694197e-05, + "loss": 0.3984760344028473, + "step": 385 + }, + { + "epoch": 0.19078215741999258, + "grad_norm": 0.9818585745680383, + "learning_rate": 1.9889638043835203e-05, + "loss": 0.41927874088287354, + "step": 386 + }, + { + "epoch": 0.19127641171382676, + "grad_norm": 0.8767703705433573, + "learning_rate": 1.9888424209795153e-05, + "loss": 0.3809741735458374, + "step": 387 + }, + { + "epoch": 0.19177066600766093, + "grad_norm": 0.9482820311569345, + "learning_rate": 1.988720377438439e-05, + "loss": 0.4237920045852661, + "step": 388 + }, + { + "epoch": 0.1922649203014951, + "grad_norm": 1.0327070863618417, + "learning_rate": 1.9885976738417662e-05, + "loss": 0.4065277576446533, + "step": 389 + }, + { + "epoch": 0.1927591745953293, + "grad_norm": 0.9237977569787911, + "learning_rate": 1.9884743102714116e-05, + "loss": 0.41154375672340393, + "step": 390 + }, + { + "epoch": 0.19325342888916347, + "grad_norm": 1.2326124039761357, + "learning_rate": 1.9883502868097304e-05, + "loss": 0.46544453501701355, + "step": 391 + }, + { + "epoch": 0.19374768318299765, + "grad_norm": 0.9587510645484782, + "learning_rate": 1.9882256035395204e-05, + "loss": 0.41279950737953186, + "step": 392 + }, + { + "epoch": 0.19424193747683183, + "grad_norm": 0.861022204519604, + "learning_rate": 1.988100260544017e-05, + "loss": 0.40083667635917664, + "step": 393 + }, + { + "epoch": 0.194736191770666, + "grad_norm": 0.8790820180214292, + "learning_rate": 1.9879742579068976e-05, + "loss": 0.40041595697402954, + "step": 394 + }, + { + "epoch": 0.19523044606450018, + "grad_norm": 1.0258873082657662, + "learning_rate": 1.9878475957122803e-05, + "loss": 0.45317894220352173, + "step": 395 + }, + { + "epoch": 0.19572470035833436, + "grad_norm": 0.9348755525455025, + "learning_rate": 1.987720274044723e-05, + "loss": 0.4163329005241394, + "step": 396 + }, + { + "epoch": 0.19621895465216854, + "grad_norm": 0.9706842353465618, + "learning_rate": 1.9875922929892235e-05, + "loss": 0.4252028167247772, + "step": 397 + }, + { + "epoch": 0.19671320894600272, + "grad_norm": 0.9127590943033566, + "learning_rate": 1.9874636526312202e-05, + "loss": 0.40558624267578125, + "step": 398 + }, + { + "epoch": 0.1972074632398369, + "grad_norm": 0.9762994418484081, + "learning_rate": 1.9873343530565913e-05, + "loss": 0.4352114796638489, + "step": 399 + }, + { + "epoch": 0.19770171753367108, + "grad_norm": 0.9123271316620398, + "learning_rate": 1.9872043943516556e-05, + "loss": 0.4076879024505615, + "step": 400 + }, + { + "epoch": 0.19819597182750526, + "grad_norm": 0.9627661884342358, + "learning_rate": 1.987073776603172e-05, + "loss": 0.4406166672706604, + "step": 401 + }, + { + "epoch": 0.19869022612133944, + "grad_norm": 0.8833048421451372, + "learning_rate": 1.9869424998983386e-05, + "loss": 0.3974360227584839, + "step": 402 + }, + { + "epoch": 0.19918448041517361, + "grad_norm": 0.8808806866223299, + "learning_rate": 1.9868105643247934e-05, + "loss": 0.4297831058502197, + "step": 403 + }, + { + "epoch": 0.1996787347090078, + "grad_norm": 0.9793340004481055, + "learning_rate": 1.986677969970616e-05, + "loss": 0.4214811623096466, + "step": 404 + }, + { + "epoch": 0.20017298900284197, + "grad_norm": 0.8979387674277745, + "learning_rate": 1.9865447169243234e-05, + "loss": 0.37227538228034973, + "step": 405 + }, + { + "epoch": 0.20066724329667615, + "grad_norm": 0.9492862396661451, + "learning_rate": 1.986410805274874e-05, + "loss": 0.4367320239543915, + "step": 406 + }, + { + "epoch": 0.20116149759051033, + "grad_norm": 0.9753990450504955, + "learning_rate": 1.9862762351116646e-05, + "loss": 0.4327583909034729, + "step": 407 + }, + { + "epoch": 0.2016557518843445, + "grad_norm": 0.9742332984468446, + "learning_rate": 1.9861410065245332e-05, + "loss": 0.45309939980506897, + "step": 408 + }, + { + "epoch": 0.20215000617817866, + "grad_norm": 0.9433373475369933, + "learning_rate": 1.986005119603756e-05, + "loss": 0.39196106791496277, + "step": 409 + }, + { + "epoch": 0.20264426047201284, + "grad_norm": 0.9834536288459345, + "learning_rate": 1.985868574440049e-05, + "loss": 0.4037923812866211, + "step": 410 + }, + { + "epoch": 0.20313851476584702, + "grad_norm": 0.9331733674072598, + "learning_rate": 1.9857313711245684e-05, + "loss": 0.41214677691459656, + "step": 411 + }, + { + "epoch": 0.2036327690596812, + "grad_norm": 0.9676344806099859, + "learning_rate": 1.9855935097489087e-05, + "loss": 0.4265231192111969, + "step": 412 + }, + { + "epoch": 0.20412702335351537, + "grad_norm": 0.9398051984820485, + "learning_rate": 1.9854549904051046e-05, + "loss": 0.4245712161064148, + "step": 413 + }, + { + "epoch": 0.20462127764734955, + "grad_norm": 1.0688359248893853, + "learning_rate": 1.985315813185629e-05, + "loss": 0.36296984553337097, + "step": 414 + }, + { + "epoch": 0.20511553194118373, + "grad_norm": 0.8752111789079005, + "learning_rate": 1.985175978183395e-05, + "loss": 0.3982447683811188, + "step": 415 + }, + { + "epoch": 0.2056097862350179, + "grad_norm": 0.9696106773901182, + "learning_rate": 1.9850354854917543e-05, + "loss": 0.4087941646575928, + "step": 416 + }, + { + "epoch": 0.2061040405288521, + "grad_norm": 0.9068111697273192, + "learning_rate": 1.9848943352044982e-05, + "loss": 0.4147699177265167, + "step": 417 + }, + { + "epoch": 0.20659829482268627, + "grad_norm": 0.9679150237458849, + "learning_rate": 1.9847525274158562e-05, + "loss": 0.42588335275650024, + "step": 418 + }, + { + "epoch": 0.20709254911652045, + "grad_norm": 0.8455247598954041, + "learning_rate": 1.9846100622204975e-05, + "loss": 0.42607247829437256, + "step": 419 + }, + { + "epoch": 0.20758680341035463, + "grad_norm": 0.8383230576354441, + "learning_rate": 1.9844669397135292e-05, + "loss": 0.3600303530693054, + "step": 420 + }, + { + "epoch": 0.2080810577041888, + "grad_norm": 0.9989742736396935, + "learning_rate": 1.9843231599904988e-05, + "loss": 0.47888651490211487, + "step": 421 + }, + { + "epoch": 0.20857531199802298, + "grad_norm": 0.9050077435994102, + "learning_rate": 1.9841787231473906e-05, + "loss": 0.3789903521537781, + "step": 422 + }, + { + "epoch": 0.20906956629185716, + "grad_norm": 0.9737429395044322, + "learning_rate": 1.9840336292806292e-05, + "loss": 0.3682858943939209, + "step": 423 + }, + { + "epoch": 0.20956382058569134, + "grad_norm": 0.9565489819657318, + "learning_rate": 1.9838878784870772e-05, + "loss": 0.42071375250816345, + "step": 424 + }, + { + "epoch": 0.21005807487952552, + "grad_norm": 0.8997646005118014, + "learning_rate": 1.9837414708640353e-05, + "loss": 0.4258945882320404, + "step": 425 + }, + { + "epoch": 0.2105523291733597, + "grad_norm": 0.8773247199262179, + "learning_rate": 1.9835944065092433e-05, + "loss": 0.42377644777297974, + "step": 426 + }, + { + "epoch": 0.21104658346719388, + "grad_norm": 0.8695535067011908, + "learning_rate": 1.9834466855208795e-05, + "loss": 0.35860198736190796, + "step": 427 + }, + { + "epoch": 0.21154083776102806, + "grad_norm": 0.8547283257189083, + "learning_rate": 1.9832983079975606e-05, + "loss": 0.3498537242412567, + "step": 428 + }, + { + "epoch": 0.21203509205486223, + "grad_norm": 0.9645117506541977, + "learning_rate": 1.9831492740383405e-05, + "loss": 0.3779754042625427, + "step": 429 + }, + { + "epoch": 0.2125293463486964, + "grad_norm": 0.9052431386511324, + "learning_rate": 1.9829995837427124e-05, + "loss": 0.3574570119380951, + "step": 430 + }, + { + "epoch": 0.2130236006425306, + "grad_norm": 0.9528105437455127, + "learning_rate": 1.982849237210608e-05, + "loss": 0.40678369998931885, + "step": 431 + }, + { + "epoch": 0.21351785493636477, + "grad_norm": 1.0383565017869998, + "learning_rate": 1.9826982345423955e-05, + "loss": 0.4392494261264801, + "step": 432 + }, + { + "epoch": 0.21401210923019895, + "grad_norm": 0.9595788699726988, + "learning_rate": 1.982546575838883e-05, + "loss": 0.3858703374862671, + "step": 433 + }, + { + "epoch": 0.21450636352403313, + "grad_norm": 1.022569300933342, + "learning_rate": 1.9823942612013153e-05, + "loss": 0.4427873492240906, + "step": 434 + }, + { + "epoch": 0.21500061781786728, + "grad_norm": 1.0243841009335557, + "learning_rate": 1.9822412907313756e-05, + "loss": 0.40610629320144653, + "step": 435 + }, + { + "epoch": 0.21549487211170146, + "grad_norm": 1.0647698522638835, + "learning_rate": 1.9820876645311847e-05, + "loss": 0.4181024432182312, + "step": 436 + }, + { + "epoch": 0.21598912640553564, + "grad_norm": 0.9101041422869367, + "learning_rate": 1.981933382703301e-05, + "loss": 0.39591747522354126, + "step": 437 + }, + { + "epoch": 0.21648338069936982, + "grad_norm": 1.0250837449595331, + "learning_rate": 1.9817784453507215e-05, + "loss": 0.4326947033405304, + "step": 438 + }, + { + "epoch": 0.216977634993204, + "grad_norm": 1.0886150838818542, + "learning_rate": 1.98162285257688e-05, + "loss": 0.42645522952079773, + "step": 439 + }, + { + "epoch": 0.21747188928703817, + "grad_norm": 0.978930417047399, + "learning_rate": 1.9814666044856472e-05, + "loss": 0.37372538447380066, + "step": 440 + }, + { + "epoch": 0.21796614358087235, + "grad_norm": 1.0917263900138416, + "learning_rate": 1.9813097011813328e-05, + "loss": 0.44066423177719116, + "step": 441 + }, + { + "epoch": 0.21846039787470653, + "grad_norm": 0.9730835844652884, + "learning_rate": 1.9811521427686833e-05, + "loss": 0.39892369508743286, + "step": 442 + }, + { + "epoch": 0.2189546521685407, + "grad_norm": 1.003964491264553, + "learning_rate": 1.980993929352882e-05, + "loss": 0.43497514724731445, + "step": 443 + }, + { + "epoch": 0.2194489064623749, + "grad_norm": 0.9716014988350979, + "learning_rate": 1.9808350610395504e-05, + "loss": 0.3810148239135742, + "step": 444 + }, + { + "epoch": 0.21994316075620907, + "grad_norm": 1.0156931642150575, + "learning_rate": 1.9806755379347465e-05, + "loss": 0.3952462673187256, + "step": 445 + }, + { + "epoch": 0.22043741505004324, + "grad_norm": 0.8774607433571091, + "learning_rate": 1.9805153601449655e-05, + "loss": 0.39168232679367065, + "step": 446 + }, + { + "epoch": 0.22093166934387742, + "grad_norm": 0.8991272209071992, + "learning_rate": 1.98035452777714e-05, + "loss": 0.38572901487350464, + "step": 447 + }, + { + "epoch": 0.2214259236377116, + "grad_norm": 0.9468757778036829, + "learning_rate": 1.980193040938639e-05, + "loss": 0.40514758229255676, + "step": 448 + }, + { + "epoch": 0.22192017793154578, + "grad_norm": 0.9858758484436677, + "learning_rate": 1.9800308997372696e-05, + "loss": 0.4289678931236267, + "step": 449 + }, + { + "epoch": 0.22241443222537996, + "grad_norm": 1.074259689420517, + "learning_rate": 1.979868104281274e-05, + "loss": 0.4082314670085907, + "step": 450 + }, + { + "epoch": 0.22290868651921414, + "grad_norm": 0.8691392363656588, + "learning_rate": 1.979704654679333e-05, + "loss": 0.3819827735424042, + "step": 451 + }, + { + "epoch": 0.22340294081304832, + "grad_norm": 0.9538480526249539, + "learning_rate": 1.979540551040563e-05, + "loss": 0.42063748836517334, + "step": 452 + }, + { + "epoch": 0.2238971951068825, + "grad_norm": 0.9510560747426838, + "learning_rate": 1.9793757934745166e-05, + "loss": 0.41634586453437805, + "step": 453 + }, + { + "epoch": 0.22439144940071667, + "grad_norm": 0.9597511417746731, + "learning_rate": 1.979210382091184e-05, + "loss": 0.4151400625705719, + "step": 454 + }, + { + "epoch": 0.22488570369455085, + "grad_norm": 0.9461794779595009, + "learning_rate": 1.9790443170009918e-05, + "loss": 0.40609729290008545, + "step": 455 + }, + { + "epoch": 0.22537995798838503, + "grad_norm": 0.9000627758052128, + "learning_rate": 1.9788775983148022e-05, + "loss": 0.38967129588127136, + "step": 456 + }, + { + "epoch": 0.2258742122822192, + "grad_norm": 0.9437292574418441, + "learning_rate": 1.978710226143915e-05, + "loss": 0.3833470940589905, + "step": 457 + }, + { + "epoch": 0.2263684665760534, + "grad_norm": 1.0849111028533656, + "learning_rate": 1.978542200600064e-05, + "loss": 0.42918887734413147, + "step": 458 + }, + { + "epoch": 0.22686272086988757, + "grad_norm": 0.8891911900981012, + "learning_rate": 1.978373521795422e-05, + "loss": 0.3793666660785675, + "step": 459 + }, + { + "epoch": 0.22735697516372175, + "grad_norm": 0.9329571379921634, + "learning_rate": 1.978204189842596e-05, + "loss": 0.3885256350040436, + "step": 460 + }, + { + "epoch": 0.22785122945755593, + "grad_norm": 0.9612859575938862, + "learning_rate": 1.97803420485463e-05, + "loss": 0.4003330171108246, + "step": 461 + }, + { + "epoch": 0.22834548375139008, + "grad_norm": 1.0153934251086247, + "learning_rate": 1.9778635669450026e-05, + "loss": 0.4050712585449219, + "step": 462 + }, + { + "epoch": 0.22883973804522426, + "grad_norm": 0.9955917551783842, + "learning_rate": 1.9776922762276304e-05, + "loss": 0.4003967046737671, + "step": 463 + }, + { + "epoch": 0.22933399233905843, + "grad_norm": 1.0625378898456048, + "learning_rate": 1.9775203328168643e-05, + "loss": 0.4506968855857849, + "step": 464 + }, + { + "epoch": 0.2298282466328926, + "grad_norm": 0.9586656507624374, + "learning_rate": 1.9773477368274906e-05, + "loss": 0.3947281241416931, + "step": 465 + }, + { + "epoch": 0.2303225009267268, + "grad_norm": 1.0193199601021392, + "learning_rate": 1.9771744883747326e-05, + "loss": 0.4166758954524994, + "step": 466 + }, + { + "epoch": 0.23081675522056097, + "grad_norm": 0.9824293606770813, + "learning_rate": 1.9770005875742484e-05, + "loss": 0.40400344133377075, + "step": 467 + }, + { + "epoch": 0.23131100951439515, + "grad_norm": 0.9404029827561814, + "learning_rate": 1.9768260345421312e-05, + "loss": 0.4143296480178833, + "step": 468 + }, + { + "epoch": 0.23180526380822933, + "grad_norm": 1.0496759638208417, + "learning_rate": 1.976650829394911e-05, + "loss": 0.39128193259239197, + "step": 469 + }, + { + "epoch": 0.2322995181020635, + "grad_norm": 1.033325283396431, + "learning_rate": 1.9764749722495514e-05, + "loss": 0.4305758476257324, + "step": 470 + }, + { + "epoch": 0.23279377239589769, + "grad_norm": 0.9791981730439014, + "learning_rate": 1.9762984632234523e-05, + "loss": 0.41711747646331787, + "step": 471 + }, + { + "epoch": 0.23328802668973186, + "grad_norm": 0.9590482451910926, + "learning_rate": 1.976121302434449e-05, + "loss": 0.43328845500946045, + "step": 472 + }, + { + "epoch": 0.23378228098356604, + "grad_norm": 0.9134750069589276, + "learning_rate": 1.975943490000811e-05, + "loss": 0.38707420229911804, + "step": 473 + }, + { + "epoch": 0.23427653527740022, + "grad_norm": 0.9896782154106246, + "learning_rate": 1.9757650260412438e-05, + "loss": 0.390054851770401, + "step": 474 + }, + { + "epoch": 0.2347707895712344, + "grad_norm": 1.0430972668852745, + "learning_rate": 1.9755859106748875e-05, + "loss": 0.45697346329689026, + "step": 475 + }, + { + "epoch": 0.23526504386506858, + "grad_norm": 0.950214634248398, + "learning_rate": 1.9754061440213165e-05, + "loss": 0.4381307363510132, + "step": 476 + }, + { + "epoch": 0.23575929815890276, + "grad_norm": 0.9612066818802636, + "learning_rate": 1.9752257262005403e-05, + "loss": 0.4217841625213623, + "step": 477 + }, + { + "epoch": 0.23625355245273694, + "grad_norm": 0.8699003234814695, + "learning_rate": 1.9750446573330038e-05, + "loss": 0.35968005657196045, + "step": 478 + }, + { + "epoch": 0.23674780674657112, + "grad_norm": 0.8353290173002438, + "learning_rate": 1.9748629375395856e-05, + "loss": 0.3516439199447632, + "step": 479 + }, + { + "epoch": 0.2372420610404053, + "grad_norm": 0.9683111499165196, + "learning_rate": 1.9746805669415995e-05, + "loss": 0.4078671634197235, + "step": 480 + }, + { + "epoch": 0.23773631533423947, + "grad_norm": 0.967434671965903, + "learning_rate": 1.9744975456607936e-05, + "loss": 0.39654213190078735, + "step": 481 + }, + { + "epoch": 0.23823056962807365, + "grad_norm": 0.9446129798331165, + "learning_rate": 1.9743138738193498e-05, + "loss": 0.41271698474884033, + "step": 482 + }, + { + "epoch": 0.23872482392190783, + "grad_norm": 0.9563785743614732, + "learning_rate": 1.974129551539885e-05, + "loss": 0.3957251310348511, + "step": 483 + }, + { + "epoch": 0.239219078215742, + "grad_norm": 1.0318067283466978, + "learning_rate": 1.9739445789454506e-05, + "loss": 0.39857393503189087, + "step": 484 + }, + { + "epoch": 0.2397133325095762, + "grad_norm": 0.9625937520590958, + "learning_rate": 1.973758956159531e-05, + "loss": 0.4263526499271393, + "step": 485 + }, + { + "epoch": 0.24020758680341037, + "grad_norm": 0.9782583924092142, + "learning_rate": 1.9735726833060457e-05, + "loss": 0.3849489688873291, + "step": 486 + }, + { + "epoch": 0.24070184109724455, + "grad_norm": 0.9932149128826128, + "learning_rate": 1.9733857605093476e-05, + "loss": 0.431019127368927, + "step": 487 + }, + { + "epoch": 0.2411960953910787, + "grad_norm": 0.9703866882534654, + "learning_rate": 1.973198187894224e-05, + "loss": 0.3740619421005249, + "step": 488 + }, + { + "epoch": 0.24169034968491288, + "grad_norm": 0.9420951155788563, + "learning_rate": 1.9730099655858953e-05, + "loss": 0.361680269241333, + "step": 489 + }, + { + "epoch": 0.24218460397874705, + "grad_norm": 1.0045147685747362, + "learning_rate": 1.9728210937100162e-05, + "loss": 0.41683071851730347, + "step": 490 + }, + { + "epoch": 0.24267885827258123, + "grad_norm": 1.0255058564946795, + "learning_rate": 1.9726315723926746e-05, + "loss": 0.3898739516735077, + "step": 491 + }, + { + "epoch": 0.2431731125664154, + "grad_norm": 0.992746780987763, + "learning_rate": 1.9724414017603925e-05, + "loss": 0.39339032769203186, + "step": 492 + }, + { + "epoch": 0.2436673668602496, + "grad_norm": 0.9018262406248393, + "learning_rate": 1.9722505819401255e-05, + "loss": 0.401676744222641, + "step": 493 + }, + { + "epoch": 0.24416162115408377, + "grad_norm": 0.956392375337736, + "learning_rate": 1.9720591130592613e-05, + "loss": 0.3814789056777954, + "step": 494 + }, + { + "epoch": 0.24465587544791795, + "grad_norm": 1.0339059816881517, + "learning_rate": 1.9718669952456226e-05, + "loss": 0.3980346918106079, + "step": 495 + }, + { + "epoch": 0.24515012974175213, + "grad_norm": 1.0852693818985448, + "learning_rate": 1.971674228627464e-05, + "loss": 0.4222795069217682, + "step": 496 + }, + { + "epoch": 0.2456443840355863, + "grad_norm": 0.9629746856387489, + "learning_rate": 1.971480813333474e-05, + "loss": 0.3795197904109955, + "step": 497 + }, + { + "epoch": 0.24613863832942048, + "grad_norm": 1.0428831707745134, + "learning_rate": 1.971286749492774e-05, + "loss": 0.3746161460876465, + "step": 498 + }, + { + "epoch": 0.24663289262325466, + "grad_norm": 1.0211942338953277, + "learning_rate": 1.9710920372349174e-05, + "loss": 0.3552350699901581, + "step": 499 + }, + { + "epoch": 0.24712714691708884, + "grad_norm": 0.913724645727759, + "learning_rate": 1.9708966766898925e-05, + "loss": 0.39690741896629333, + "step": 500 + }, + { + "epoch": 0.24762140121092302, + "grad_norm": 1.0179277636972188, + "learning_rate": 1.9707006679881186e-05, + "loss": 0.39530014991760254, + "step": 501 + }, + { + "epoch": 0.2481156555047572, + "grad_norm": 1.0722850381631455, + "learning_rate": 1.9705040112604483e-05, + "loss": 0.41228705644607544, + "step": 502 + }, + { + "epoch": 0.24860990979859138, + "grad_norm": 0.9774177098582278, + "learning_rate": 1.9703067066381668e-05, + "loss": 0.4330476224422455, + "step": 503 + }, + { + "epoch": 0.24910416409242556, + "grad_norm": 0.9849824106564479, + "learning_rate": 1.970108754252992e-05, + "loss": 0.38365668058395386, + "step": 504 + }, + { + "epoch": 0.24959841838625973, + "grad_norm": 1.0789440281177851, + "learning_rate": 1.969910154237074e-05, + "loss": 0.4419581890106201, + "step": 505 + }, + { + "epoch": 0.2500926726800939, + "grad_norm": 1.0828116066497757, + "learning_rate": 1.9697109067229957e-05, + "loss": 0.38741230964660645, + "step": 506 + }, + { + "epoch": 0.2505869269739281, + "grad_norm": 0.9914523280251673, + "learning_rate": 1.969511011843771e-05, + "loss": 0.41751983761787415, + "step": 507 + }, + { + "epoch": 0.25108118126776224, + "grad_norm": 0.9718169799013945, + "learning_rate": 1.9693104697328477e-05, + "loss": 0.40355241298675537, + "step": 508 + }, + { + "epoch": 0.25157543556159645, + "grad_norm": 1.003225231520968, + "learning_rate": 1.9691092805241046e-05, + "loss": 0.3511045575141907, + "step": 509 + }, + { + "epoch": 0.2520696898554306, + "grad_norm": 1.1208960250871327, + "learning_rate": 1.9689074443518526e-05, + "loss": 0.38917112350463867, + "step": 510 + }, + { + "epoch": 0.2525639441492648, + "grad_norm": 0.9640213098912707, + "learning_rate": 1.968704961350835e-05, + "loss": 0.40256473422050476, + "step": 511 + }, + { + "epoch": 0.25305819844309896, + "grad_norm": 0.8857886708710384, + "learning_rate": 1.968501831656226e-05, + "loss": 0.32350897789001465, + "step": 512 + }, + { + "epoch": 0.25355245273693316, + "grad_norm": 1.0209548318094466, + "learning_rate": 1.9682980554036322e-05, + "loss": 0.36787012219429016, + "step": 513 + }, + { + "epoch": 0.2540467070307673, + "grad_norm": 1.063374274844625, + "learning_rate": 1.9680936327290924e-05, + "loss": 0.4035605490207672, + "step": 514 + }, + { + "epoch": 0.2545409613246015, + "grad_norm": 0.9437423188361623, + "learning_rate": 1.9678885637690755e-05, + "loss": 0.39402660727500916, + "step": 515 + }, + { + "epoch": 0.2550352156184357, + "grad_norm": 1.1793476229973228, + "learning_rate": 1.967682848660483e-05, + "loss": 0.37553271651268005, + "step": 516 + }, + { + "epoch": 0.2555294699122699, + "grad_norm": 1.047789732428987, + "learning_rate": 1.9674764875406472e-05, + "loss": 0.40148675441741943, + "step": 517 + }, + { + "epoch": 0.25602372420610403, + "grad_norm": 1.1994265366678782, + "learning_rate": 1.967269480547332e-05, + "loss": 0.45255252718925476, + "step": 518 + }, + { + "epoch": 0.25651797849993824, + "grad_norm": 1.0116666478277523, + "learning_rate": 1.9670618278187318e-05, + "loss": 0.4183574616909027, + "step": 519 + }, + { + "epoch": 0.2570122327937724, + "grad_norm": 0.9518606397664687, + "learning_rate": 1.9668535294934733e-05, + "loss": 0.3950796127319336, + "step": 520 + }, + { + "epoch": 0.2575064870876066, + "grad_norm": 0.9729673190351172, + "learning_rate": 1.9666445857106132e-05, + "loss": 0.4062424898147583, + "step": 521 + }, + { + "epoch": 0.25800074138144075, + "grad_norm": 0.9474577180562711, + "learning_rate": 1.966434996609639e-05, + "loss": 0.4095906913280487, + "step": 522 + }, + { + "epoch": 0.25849499567527495, + "grad_norm": 1.1739974412660419, + "learning_rate": 1.96622476233047e-05, + "loss": 0.42302393913269043, + "step": 523 + }, + { + "epoch": 0.2589892499691091, + "grad_norm": 1.0746371790844444, + "learning_rate": 1.966013883013455e-05, + "loss": 0.43204039335250854, + "step": 524 + }, + { + "epoch": 0.2594835042629433, + "grad_norm": 0.9744852361980706, + "learning_rate": 1.9658023587993748e-05, + "loss": 0.39941906929016113, + "step": 525 + }, + { + "epoch": 0.25997775855677746, + "grad_norm": 0.9322675006976836, + "learning_rate": 1.9655901898294397e-05, + "loss": 0.37053728103637695, + "step": 526 + }, + { + "epoch": 0.26047201285061167, + "grad_norm": 0.9500036404091089, + "learning_rate": 1.96537737624529e-05, + "loss": 0.4126317501068115, + "step": 527 + }, + { + "epoch": 0.2609662671444458, + "grad_norm": 0.9592560956850021, + "learning_rate": 1.9651639181889975e-05, + "loss": 0.42397794127464294, + "step": 528 + }, + { + "epoch": 0.26146052143827997, + "grad_norm": 1.09730750123291, + "learning_rate": 1.964949815803064e-05, + "loss": 0.3606872260570526, + "step": 529 + }, + { + "epoch": 0.2619547757321142, + "grad_norm": 1.0256203362936218, + "learning_rate": 1.9647350692304206e-05, + "loss": 0.420923113822937, + "step": 530 + }, + { + "epoch": 0.2624490300259483, + "grad_norm": 1.0242401280009386, + "learning_rate": 1.9645196786144298e-05, + "loss": 0.41700440645217896, + "step": 531 + }, + { + "epoch": 0.26294328431978253, + "grad_norm": 0.9861507549209962, + "learning_rate": 1.9643036440988825e-05, + "loss": 0.3961814045906067, + "step": 532 + }, + { + "epoch": 0.2634375386136167, + "grad_norm": 0.9400998714081333, + "learning_rate": 1.9640869658280005e-05, + "loss": 0.4025250971317291, + "step": 533 + }, + { + "epoch": 0.2639317929074509, + "grad_norm": 1.0201682019086518, + "learning_rate": 1.9638696439464357e-05, + "loss": 0.38828611373901367, + "step": 534 + }, + { + "epoch": 0.26442604720128504, + "grad_norm": 0.8944214314341241, + "learning_rate": 1.963651678599268e-05, + "loss": 0.3109109401702881, + "step": 535 + }, + { + "epoch": 0.26492030149511925, + "grad_norm": 1.0758326810562073, + "learning_rate": 1.963433069932009e-05, + "loss": 0.41516438126564026, + "step": 536 + }, + { + "epoch": 0.2654145557889534, + "grad_norm": 0.972035022615468, + "learning_rate": 1.9632138180905982e-05, + "loss": 0.3765295743942261, + "step": 537 + }, + { + "epoch": 0.2659088100827876, + "grad_norm": 1.0590611315407708, + "learning_rate": 1.9629939232214052e-05, + "loss": 0.37631309032440186, + "step": 538 + }, + { + "epoch": 0.26640306437662176, + "grad_norm": 0.9543257606304313, + "learning_rate": 1.9627733854712286e-05, + "loss": 0.3640018403530121, + "step": 539 + }, + { + "epoch": 0.26689731867045596, + "grad_norm": 1.0213174253270256, + "learning_rate": 1.9625522049872962e-05, + "loss": 0.3971521854400635, + "step": 540 + }, + { + "epoch": 0.2673915729642901, + "grad_norm": 1.0059131210770185, + "learning_rate": 1.962330381917265e-05, + "loss": 0.4218612313270569, + "step": 541 + }, + { + "epoch": 0.2678858272581243, + "grad_norm": 1.0124871124462342, + "learning_rate": 1.9621079164092203e-05, + "loss": 0.38814622163772583, + "step": 542 + }, + { + "epoch": 0.26838008155195847, + "grad_norm": 1.0310689772428585, + "learning_rate": 1.961884808611678e-05, + "loss": 0.3912709355354309, + "step": 543 + }, + { + "epoch": 0.2688743358457927, + "grad_norm": 0.9919097213748044, + "learning_rate": 1.9616610586735808e-05, + "loss": 0.4007106423377991, + "step": 544 + }, + { + "epoch": 0.26936859013962683, + "grad_norm": 0.9871985402956727, + "learning_rate": 1.9614366667443016e-05, + "loss": 0.37406057119369507, + "step": 545 + }, + { + "epoch": 0.26986284443346104, + "grad_norm": 0.970768236440829, + "learning_rate": 1.961211632973641e-05, + "loss": 0.4187811613082886, + "step": 546 + }, + { + "epoch": 0.2703570987272952, + "grad_norm": 1.049304525520643, + "learning_rate": 1.960985957511828e-05, + "loss": 0.44418057799339294, + "step": 547 + }, + { + "epoch": 0.2708513530211294, + "grad_norm": 1.0048719478421346, + "learning_rate": 1.9607596405095205e-05, + "loss": 0.41016438603401184, + "step": 548 + }, + { + "epoch": 0.27134560731496354, + "grad_norm": 1.2563417457062223, + "learning_rate": 1.9605326821178047e-05, + "loss": 0.39461439847946167, + "step": 549 + }, + { + "epoch": 0.27183986160879775, + "grad_norm": 0.9443238609304102, + "learning_rate": 1.960305082488195e-05, + "loss": 0.4159786105155945, + "step": 550 + }, + { + "epoch": 0.2723341159026319, + "grad_norm": 0.9387957037755528, + "learning_rate": 1.960076841772633e-05, + "loss": 0.3702941834926605, + "step": 551 + }, + { + "epoch": 0.2728283701964661, + "grad_norm": 1.0745575617770338, + "learning_rate": 1.9598479601234894e-05, + "loss": 0.3482900559902191, + "step": 552 + }, + { + "epoch": 0.27332262449030026, + "grad_norm": 1.1412061517783256, + "learning_rate": 1.9596184376935618e-05, + "loss": 0.40550655126571655, + "step": 553 + }, + { + "epoch": 0.2738168787841344, + "grad_norm": 0.9446073244587436, + "learning_rate": 1.9593882746360767e-05, + "loss": 0.38604867458343506, + "step": 554 + }, + { + "epoch": 0.2743111330779686, + "grad_norm": 0.9388567147005249, + "learning_rate": 1.9591574711046876e-05, + "loss": 0.36586758494377136, + "step": 555 + }, + { + "epoch": 0.27480538737180277, + "grad_norm": 0.9730414125092071, + "learning_rate": 1.958926027253475e-05, + "loss": 0.37780559062957764, + "step": 556 + }, + { + "epoch": 0.275299641665637, + "grad_norm": 0.9401659835761762, + "learning_rate": 1.9586939432369486e-05, + "loss": 0.3837544322013855, + "step": 557 + }, + { + "epoch": 0.2757938959594711, + "grad_norm": 1.038905164013387, + "learning_rate": 1.9584612192100433e-05, + "loss": 0.39425861835479736, + "step": 558 + }, + { + "epoch": 0.27628815025330533, + "grad_norm": 1.0791545750316935, + "learning_rate": 1.958227855328123e-05, + "loss": 0.4008832275867462, + "step": 559 + }, + { + "epoch": 0.2767824045471395, + "grad_norm": 1.0509839705522974, + "learning_rate": 1.957993851746978e-05, + "loss": 0.42411595582962036, + "step": 560 + }, + { + "epoch": 0.2772766588409737, + "grad_norm": 1.1626138880546706, + "learning_rate": 1.9577592086228257e-05, + "loss": 0.4028055965900421, + "step": 561 + }, + { + "epoch": 0.27777091313480784, + "grad_norm": 0.9383996498843509, + "learning_rate": 1.9575239261123102e-05, + "loss": 0.3785157799720764, + "step": 562 + }, + { + "epoch": 0.27826516742864205, + "grad_norm": 0.9289370196839293, + "learning_rate": 1.9572880043725032e-05, + "loss": 0.3726264536380768, + "step": 563 + }, + { + "epoch": 0.2787594217224762, + "grad_norm": 0.9959287145902769, + "learning_rate": 1.957051443560902e-05, + "loss": 0.37261486053466797, + "step": 564 + }, + { + "epoch": 0.2792536760163104, + "grad_norm": 0.9394373844868922, + "learning_rate": 1.956814243835432e-05, + "loss": 0.34781068563461304, + "step": 565 + }, + { + "epoch": 0.27974793031014455, + "grad_norm": 0.9899407389551799, + "learning_rate": 1.956576405354444e-05, + "loss": 0.3828197121620178, + "step": 566 + }, + { + "epoch": 0.28024218460397876, + "grad_norm": 0.9387592741594649, + "learning_rate": 1.9563379282767156e-05, + "loss": 0.3839726150035858, + "step": 567 + }, + { + "epoch": 0.2807364388978129, + "grad_norm": 1.053498529947078, + "learning_rate": 1.9560988127614507e-05, + "loss": 0.3658025562763214, + "step": 568 + }, + { + "epoch": 0.2812306931916471, + "grad_norm": 1.064206434015044, + "learning_rate": 1.9558590589682795e-05, + "loss": 0.400045782327652, + "step": 569 + }, + { + "epoch": 0.28172494748548127, + "grad_norm": 0.9470530474737298, + "learning_rate": 1.955618667057258e-05, + "loss": 0.36586880683898926, + "step": 570 + }, + { + "epoch": 0.2822192017793155, + "grad_norm": 1.0137760854012388, + "learning_rate": 1.9553776371888684e-05, + "loss": 0.3886389136314392, + "step": 571 + }, + { + "epoch": 0.2827134560731496, + "grad_norm": 1.0159520278130145, + "learning_rate": 1.955135969524019e-05, + "loss": 0.37858110666275024, + "step": 572 + }, + { + "epoch": 0.28320771036698383, + "grad_norm": 0.939134880585939, + "learning_rate": 1.9548936642240435e-05, + "loss": 0.3264877498149872, + "step": 573 + }, + { + "epoch": 0.283701964660818, + "grad_norm": 1.1465399296789363, + "learning_rate": 1.9546507214507017e-05, + "loss": 0.3756924569606781, + "step": 574 + }, + { + "epoch": 0.2841962189546522, + "grad_norm": 1.0922050133590595, + "learning_rate": 1.9544071413661783e-05, + "loss": 0.3773806691169739, + "step": 575 + }, + { + "epoch": 0.28469047324848634, + "grad_norm": 1.0432958526312845, + "learning_rate": 1.9541629241330842e-05, + "loss": 0.37437382340431213, + "step": 576 + }, + { + "epoch": 0.28518472754232055, + "grad_norm": 0.9730241652440514, + "learning_rate": 1.9539180699144552e-05, + "loss": 0.3835929036140442, + "step": 577 + }, + { + "epoch": 0.2856789818361547, + "grad_norm": 1.2039096391780213, + "learning_rate": 1.9536725788737528e-05, + "loss": 0.39163681864738464, + "step": 578 + }, + { + "epoch": 0.2861732361299889, + "grad_norm": 1.1007303408462066, + "learning_rate": 1.953426451174863e-05, + "loss": 0.39241698384284973, + "step": 579 + }, + { + "epoch": 0.28666749042382306, + "grad_norm": 0.9748115984741068, + "learning_rate": 1.953179686982097e-05, + "loss": 0.32731348276138306, + "step": 580 + }, + { + "epoch": 0.2871617447176572, + "grad_norm": 0.9649406632940735, + "learning_rate": 1.9529322864601915e-05, + "loss": 0.34735041856765747, + "step": 581 + }, + { + "epoch": 0.2876559990114914, + "grad_norm": 1.0831552948058796, + "learning_rate": 1.952684249774307e-05, + "loss": 0.3795308470726013, + "step": 582 + }, + { + "epoch": 0.28815025330532557, + "grad_norm": 1.0599543241474398, + "learning_rate": 1.95243557709003e-05, + "loss": 0.3546086549758911, + "step": 583 + }, + { + "epoch": 0.28864450759915977, + "grad_norm": 0.9634030800835625, + "learning_rate": 1.9521862685733703e-05, + "loss": 0.35397839546203613, + "step": 584 + }, + { + "epoch": 0.2891387618929939, + "grad_norm": 0.972134968680729, + "learning_rate": 1.9519363243907627e-05, + "loss": 0.350521981716156, + "step": 585 + }, + { + "epoch": 0.28963301618682813, + "grad_norm": 1.0201322204570258, + "learning_rate": 1.9516857447090663e-05, + "loss": 0.380625456571579, + "step": 586 + }, + { + "epoch": 0.2901272704806623, + "grad_norm": 0.9847688200101109, + "learning_rate": 1.9514345296955647e-05, + "loss": 0.40378236770629883, + "step": 587 + }, + { + "epoch": 0.2906215247744965, + "grad_norm": 1.0122113576142937, + "learning_rate": 1.9511826795179653e-05, + "loss": 0.4050450325012207, + "step": 588 + }, + { + "epoch": 0.29111577906833064, + "grad_norm": 1.048628562831542, + "learning_rate": 1.9509301943444e-05, + "loss": 0.3772329092025757, + "step": 589 + }, + { + "epoch": 0.29161003336216484, + "grad_norm": 1.0803687765146506, + "learning_rate": 1.9506770743434244e-05, + "loss": 0.4079870581626892, + "step": 590 + }, + { + "epoch": 0.292104287655999, + "grad_norm": 1.0069688403525805, + "learning_rate": 1.950423319684017e-05, + "loss": 0.4233503043651581, + "step": 591 + }, + { + "epoch": 0.2925985419498332, + "grad_norm": 1.0403594154189246, + "learning_rate": 1.9501689305355814e-05, + "loss": 0.395530104637146, + "step": 592 + }, + { + "epoch": 0.29309279624366735, + "grad_norm": 1.0468686113369423, + "learning_rate": 1.949913907067944e-05, + "loss": 0.4266175925731659, + "step": 593 + }, + { + "epoch": 0.29358705053750156, + "grad_norm": 1.0371386643985676, + "learning_rate": 1.949658249451355e-05, + "loss": 0.4428660571575165, + "step": 594 + }, + { + "epoch": 0.2940813048313357, + "grad_norm": 0.928511699803538, + "learning_rate": 1.9494019578564874e-05, + "loss": 0.36831945180892944, + "step": 595 + }, + { + "epoch": 0.2945755591251699, + "grad_norm": 1.059362576098806, + "learning_rate": 1.949145032454438e-05, + "loss": 0.392259806394577, + "step": 596 + }, + { + "epoch": 0.29506981341900407, + "grad_norm": 0.9638882642169329, + "learning_rate": 1.948887473416727e-05, + "loss": 0.43743032217025757, + "step": 597 + }, + { + "epoch": 0.2955640677128383, + "grad_norm": 0.9566828851720006, + "learning_rate": 1.9486292809152965e-05, + "loss": 0.3725258409976959, + "step": 598 + }, + { + "epoch": 0.2960583220066724, + "grad_norm": 0.9479087116485218, + "learning_rate": 1.948370455122512e-05, + "loss": 0.39507436752319336, + "step": 599 + }, + { + "epoch": 0.29655257630050663, + "grad_norm": 1.0509892705512045, + "learning_rate": 1.9481109962111623e-05, + "loss": 0.40915870666503906, + "step": 600 + }, + { + "epoch": 0.2970468305943408, + "grad_norm": 0.9528830591600533, + "learning_rate": 1.947850904354459e-05, + "loss": 0.3465006351470947, + "step": 601 + }, + { + "epoch": 0.297541084888175, + "grad_norm": 1.0989483899383072, + "learning_rate": 1.9475901797260346e-05, + "loss": 0.4205567538738251, + "step": 602 + }, + { + "epoch": 0.29803533918200914, + "grad_norm": 0.9986003576186586, + "learning_rate": 1.9473288224999455e-05, + "loss": 0.37682560086250305, + "step": 603 + }, + { + "epoch": 0.29852959347584335, + "grad_norm": 1.0393693996744362, + "learning_rate": 1.9470668328506705e-05, + "loss": 0.3865458369255066, + "step": 604 + }, + { + "epoch": 0.2990238477696775, + "grad_norm": 0.9426218637426483, + "learning_rate": 1.9468042109531096e-05, + "loss": 0.36366063356399536, + "step": 605 + }, + { + "epoch": 0.2995181020635117, + "grad_norm": 0.9801320950707162, + "learning_rate": 1.9465409569825857e-05, + "loss": 0.3861471116542816, + "step": 606 + }, + { + "epoch": 0.30001235635734586, + "grad_norm": 1.0257103381374684, + "learning_rate": 1.9462770711148433e-05, + "loss": 0.3499199151992798, + "step": 607 + }, + { + "epoch": 0.30050661065118, + "grad_norm": 1.1030346241860873, + "learning_rate": 1.946012553526049e-05, + "loss": 0.3704417943954468, + "step": 608 + }, + { + "epoch": 0.3010008649450142, + "grad_norm": 1.0751948386377395, + "learning_rate": 1.9457474043927908e-05, + "loss": 0.41278937458992004, + "step": 609 + }, + { + "epoch": 0.30149511923884836, + "grad_norm": 1.0379271128545955, + "learning_rate": 1.9454816238920787e-05, + "loss": 0.36078256368637085, + "step": 610 + }, + { + "epoch": 0.30198937353268257, + "grad_norm": 1.05890389444684, + "learning_rate": 1.9452152122013434e-05, + "loss": 0.3713051676750183, + "step": 611 + }, + { + "epoch": 0.3024836278265167, + "grad_norm": 1.0547983951495754, + "learning_rate": 1.9449481694984382e-05, + "loss": 0.3919684886932373, + "step": 612 + }, + { + "epoch": 0.3029778821203509, + "grad_norm": 1.1211767888578545, + "learning_rate": 1.9446804959616364e-05, + "loss": 0.4249044358730316, + "step": 613 + }, + { + "epoch": 0.3034721364141851, + "grad_norm": 1.0386798112962086, + "learning_rate": 1.9444121917696335e-05, + "loss": 0.4033172130584717, + "step": 614 + }, + { + "epoch": 0.3039663907080193, + "grad_norm": 1.020453301484689, + "learning_rate": 1.9441432571015455e-05, + "loss": 0.35740789771080017, + "step": 615 + }, + { + "epoch": 0.30446064500185344, + "grad_norm": 1.0567402195641693, + "learning_rate": 1.9438736921369093e-05, + "loss": 0.41219189763069153, + "step": 616 + }, + { + "epoch": 0.30495489929568764, + "grad_norm": 1.0356137182677312, + "learning_rate": 1.9436034970556824e-05, + "loss": 0.3751283884048462, + "step": 617 + }, + { + "epoch": 0.3054491535895218, + "grad_norm": 1.0460808776118622, + "learning_rate": 1.9433326720382433e-05, + "loss": 0.40294593572616577, + "step": 618 + }, + { + "epoch": 0.305943407883356, + "grad_norm": 1.0087358245362568, + "learning_rate": 1.943061217265391e-05, + "loss": 0.4163772463798523, + "step": 619 + }, + { + "epoch": 0.30643766217719015, + "grad_norm": 1.007467123707354, + "learning_rate": 1.9427891329183444e-05, + "loss": 0.3796529769897461, + "step": 620 + }, + { + "epoch": 0.30693191647102436, + "grad_norm": 1.0905533067383615, + "learning_rate": 1.942516419178744e-05, + "loss": 0.44097092747688293, + "step": 621 + }, + { + "epoch": 0.3074261707648585, + "grad_norm": 0.9615172689674734, + "learning_rate": 1.942243076228649e-05, + "loss": 0.384232759475708, + "step": 622 + }, + { + "epoch": 0.3079204250586927, + "grad_norm": 0.9038435200954008, + "learning_rate": 1.941969104250539e-05, + "loss": 0.3734084367752075, + "step": 623 + }, + { + "epoch": 0.30841467935252687, + "grad_norm": 0.9414597847653995, + "learning_rate": 1.9416945034273142e-05, + "loss": 0.3532239496707916, + "step": 624 + }, + { + "epoch": 0.3089089336463611, + "grad_norm": 1.0668895366566058, + "learning_rate": 1.941419273942294e-05, + "loss": 0.39430537819862366, + "step": 625 + }, + { + "epoch": 0.3094031879401952, + "grad_norm": 1.0091341034087684, + "learning_rate": 1.941143415979218e-05, + "loss": 0.35790857672691345, + "step": 626 + }, + { + "epoch": 0.30989744223402943, + "grad_norm": 1.0381854826035726, + "learning_rate": 1.9408669297222446e-05, + "loss": 0.3684060871601105, + "step": 627 + }, + { + "epoch": 0.3103916965278636, + "grad_norm": 0.9553898295016832, + "learning_rate": 1.9405898153559522e-05, + "loss": 0.3425355553627014, + "step": 628 + }, + { + "epoch": 0.3108859508216978, + "grad_norm": 0.9032294986887355, + "learning_rate": 1.9403120730653387e-05, + "loss": 0.3295109272003174, + "step": 629 + }, + { + "epoch": 0.31138020511553194, + "grad_norm": 1.0576168899253493, + "learning_rate": 1.940033703035821e-05, + "loss": 0.37015989422798157, + "step": 630 + }, + { + "epoch": 0.31187445940936614, + "grad_norm": 1.1361288169710941, + "learning_rate": 1.939754705453234e-05, + "loss": 0.40625980496406555, + "step": 631 + }, + { + "epoch": 0.3123687137032003, + "grad_norm": 1.3354529260238757, + "learning_rate": 1.939475080503833e-05, + "loss": 0.42503830790519714, + "step": 632 + }, + { + "epoch": 0.31286296799703445, + "grad_norm": 1.0863606838535078, + "learning_rate": 1.939194828374292e-05, + "loss": 0.36230289936065674, + "step": 633 + }, + { + "epoch": 0.31335722229086865, + "grad_norm": 0.9800314584790245, + "learning_rate": 1.938913949251703e-05, + "loss": 0.4128720164299011, + "step": 634 + }, + { + "epoch": 0.3138514765847028, + "grad_norm": 1.1018828002960295, + "learning_rate": 1.938632443323577e-05, + "loss": 0.39706575870513916, + "step": 635 + }, + { + "epoch": 0.314345730878537, + "grad_norm": 1.0451325322820368, + "learning_rate": 1.9383503107778434e-05, + "loss": 0.38395214080810547, + "step": 636 + }, + { + "epoch": 0.31483998517237116, + "grad_norm": 0.9669746428685202, + "learning_rate": 1.9380675518028495e-05, + "loss": 0.3629944324493408, + "step": 637 + }, + { + "epoch": 0.31533423946620537, + "grad_norm": 1.0589959103814197, + "learning_rate": 1.937784166587361e-05, + "loss": 0.39474761486053467, + "step": 638 + }, + { + "epoch": 0.3158284937600395, + "grad_norm": 1.085403264447479, + "learning_rate": 1.9375001553205627e-05, + "loss": 0.423098087310791, + "step": 639 + }, + { + "epoch": 0.3163227480538737, + "grad_norm": 0.9239589256190138, + "learning_rate": 1.937215518192056e-05, + "loss": 0.3453904986381531, + "step": 640 + }, + { + "epoch": 0.3168170023477079, + "grad_norm": 0.9432054956835023, + "learning_rate": 1.9369302553918605e-05, + "loss": 0.3659127354621887, + "step": 641 + }, + { + "epoch": 0.3173112566415421, + "grad_norm": 1.060860081964917, + "learning_rate": 1.9366443671104132e-05, + "loss": 0.3613426089286804, + "step": 642 + }, + { + "epoch": 0.31780551093537623, + "grad_norm": 0.9515218135636598, + "learning_rate": 1.93635785353857e-05, + "loss": 0.3556531071662903, + "step": 643 + }, + { + "epoch": 0.31829976522921044, + "grad_norm": 0.9893630091198329, + "learning_rate": 1.9360707148676022e-05, + "loss": 0.3515596091747284, + "step": 644 + }, + { + "epoch": 0.3187940195230446, + "grad_norm": 0.9802147109168395, + "learning_rate": 1.9357829512892e-05, + "loss": 0.36270469427108765, + "step": 645 + }, + { + "epoch": 0.3192882738168788, + "grad_norm": 0.9936651325349853, + "learning_rate": 1.9354945629954706e-05, + "loss": 0.3617076277732849, + "step": 646 + }, + { + "epoch": 0.31978252811071295, + "grad_norm": 1.0835943099678094, + "learning_rate": 1.9352055501789376e-05, + "loss": 0.3888331949710846, + "step": 647 + }, + { + "epoch": 0.32027678240454716, + "grad_norm": 1.0454884563674065, + "learning_rate": 1.9349159130325413e-05, + "loss": 0.41199982166290283, + "step": 648 + }, + { + "epoch": 0.3207710366983813, + "grad_norm": 1.0758693507529822, + "learning_rate": 1.93462565174964e-05, + "loss": 0.3878370225429535, + "step": 649 + }, + { + "epoch": 0.3212652909922155, + "grad_norm": 1.0303850194409756, + "learning_rate": 1.9343347665240077e-05, + "loss": 0.380184531211853, + "step": 650 + }, + { + "epoch": 0.32175954528604966, + "grad_norm": 1.143999159363527, + "learning_rate": 1.9340432575498355e-05, + "loss": 0.3746795356273651, + "step": 651 + }, + { + "epoch": 0.32225379957988387, + "grad_norm": 1.0188863097829193, + "learning_rate": 1.93375112502173e-05, + "loss": 0.3700905442237854, + "step": 652 + }, + { + "epoch": 0.322748053873718, + "grad_norm": 0.9032826115280742, + "learning_rate": 1.9334583691347153e-05, + "loss": 0.3331850469112396, + "step": 653 + }, + { + "epoch": 0.32324230816755223, + "grad_norm": 0.949854268007892, + "learning_rate": 1.933164990084231e-05, + "loss": 0.3397464156150818, + "step": 654 + }, + { + "epoch": 0.3237365624613864, + "grad_norm": 1.1199806793436613, + "learning_rate": 1.9328709880661326e-05, + "loss": 0.3837242126464844, + "step": 655 + }, + { + "epoch": 0.3242308167552206, + "grad_norm": 1.0801449332087112, + "learning_rate": 1.9325763632766916e-05, + "loss": 0.38854193687438965, + "step": 656 + }, + { + "epoch": 0.32472507104905474, + "grad_norm": 1.1330798719469783, + "learning_rate": 1.9322811159125955e-05, + "loss": 0.41792556643486023, + "step": 657 + }, + { + "epoch": 0.32521932534288894, + "grad_norm": 0.9831880252943476, + "learning_rate": 1.931985246170947e-05, + "loss": 0.3968243896961212, + "step": 658 + }, + { + "epoch": 0.3257135796367231, + "grad_norm": 1.0416971268065567, + "learning_rate": 1.9316887542492645e-05, + "loss": 0.41183531284332275, + "step": 659 + }, + { + "epoch": 0.32620783393055724, + "grad_norm": 1.0367106782684, + "learning_rate": 1.931391640345482e-05, + "loss": 0.36057350039482117, + "step": 660 + }, + { + "epoch": 0.32670208822439145, + "grad_norm": 1.0663955736026025, + "learning_rate": 1.9310939046579482e-05, + "loss": 0.36032363772392273, + "step": 661 + }, + { + "epoch": 0.3271963425182256, + "grad_norm": 0.9657326304523917, + "learning_rate": 1.9307955473854275e-05, + "loss": 0.3682931363582611, + "step": 662 + }, + { + "epoch": 0.3276905968120598, + "grad_norm": 1.004896861978755, + "learning_rate": 1.9304965687270987e-05, + "loss": 0.3829198181629181, + "step": 663 + }, + { + "epoch": 0.32818485110589396, + "grad_norm": 1.0180253035605964, + "learning_rate": 1.930196968882556e-05, + "loss": 0.3901137709617615, + "step": 664 + }, + { + "epoch": 0.32867910539972817, + "grad_norm": 0.9037607838463562, + "learning_rate": 1.9298967480518077e-05, + "loss": 0.34352344274520874, + "step": 665 + }, + { + "epoch": 0.3291733596935623, + "grad_norm": 0.9918701152773953, + "learning_rate": 1.9295959064352767e-05, + "loss": 0.38822662830352783, + "step": 666 + }, + { + "epoch": 0.3296676139873965, + "grad_norm": 0.9619347095581623, + "learning_rate": 1.9292944442338013e-05, + "loss": 0.3639586567878723, + "step": 667 + }, + { + "epoch": 0.3301618682812307, + "grad_norm": 1.0248410702019595, + "learning_rate": 1.9289923616486326e-05, + "loss": 0.38537997007369995, + "step": 668 + }, + { + "epoch": 0.3306561225750649, + "grad_norm": 0.9469693142742907, + "learning_rate": 1.9286896588814373e-05, + "loss": 0.3514263331890106, + "step": 669 + }, + { + "epoch": 0.33115037686889903, + "grad_norm": 0.9776369401143131, + "learning_rate": 1.928386336134295e-05, + "loss": 0.3873803913593292, + "step": 670 + }, + { + "epoch": 0.33164463116273324, + "grad_norm": 1.0063829461952047, + "learning_rate": 1.9280823936096994e-05, + "loss": 0.36644282937049866, + "step": 671 + }, + { + "epoch": 0.3321388854565674, + "grad_norm": 0.8900960907324665, + "learning_rate": 1.9277778315105587e-05, + "loss": 0.34837427735328674, + "step": 672 + }, + { + "epoch": 0.3326331397504016, + "grad_norm": 1.0946494998655654, + "learning_rate": 1.927472650040194e-05, + "loss": 0.3879021406173706, + "step": 673 + }, + { + "epoch": 0.33312739404423575, + "grad_norm": 1.0256193203663788, + "learning_rate": 1.9271668494023404e-05, + "loss": 0.3753926753997803, + "step": 674 + }, + { + "epoch": 0.33362164833806995, + "grad_norm": 1.1193381317991955, + "learning_rate": 1.9268604298011454e-05, + "loss": 0.35362815856933594, + "step": 675 + }, + { + "epoch": 0.3341159026319041, + "grad_norm": 1.0612190451852097, + "learning_rate": 1.926553391441171e-05, + "loss": 0.3685564696788788, + "step": 676 + }, + { + "epoch": 0.3346101569257383, + "grad_norm": 1.2837359031878948, + "learning_rate": 1.926245734527391e-05, + "loss": 0.42326927185058594, + "step": 677 + }, + { + "epoch": 0.33510441121957246, + "grad_norm": 1.0247968871472715, + "learning_rate": 1.925937459265193e-05, + "loss": 0.35918861627578735, + "step": 678 + }, + { + "epoch": 0.33559866551340667, + "grad_norm": 1.1358099673309532, + "learning_rate": 1.9256285658603773e-05, + "loss": 0.38703471422195435, + "step": 679 + }, + { + "epoch": 0.3360929198072408, + "grad_norm": 1.0232813577835114, + "learning_rate": 1.9253190545191567e-05, + "loss": 0.3993009924888611, + "step": 680 + }, + { + "epoch": 0.336587174101075, + "grad_norm": 1.178587285681796, + "learning_rate": 1.9250089254481566e-05, + "loss": 0.3998498320579529, + "step": 681 + }, + { + "epoch": 0.3370814283949092, + "grad_norm": 1.0577657705862298, + "learning_rate": 1.9246981788544145e-05, + "loss": 0.37211501598358154, + "step": 682 + }, + { + "epoch": 0.3375756826887434, + "grad_norm": 1.0126592857393306, + "learning_rate": 1.9243868149453806e-05, + "loss": 0.37204745411872864, + "step": 683 + }, + { + "epoch": 0.33806993698257753, + "grad_norm": 0.9626025917248462, + "learning_rate": 1.924074833928917e-05, + "loss": 0.3784663677215576, + "step": 684 + }, + { + "epoch": 0.33856419127641174, + "grad_norm": 1.0085796667337208, + "learning_rate": 1.9237622360132975e-05, + "loss": 0.4140951633453369, + "step": 685 + }, + { + "epoch": 0.3390584455702459, + "grad_norm": 1.0251059918961796, + "learning_rate": 1.9234490214072083e-05, + "loss": 0.3723721504211426, + "step": 686 + }, + { + "epoch": 0.33955269986408004, + "grad_norm": 1.0704762953012439, + "learning_rate": 1.923135190319747e-05, + "loss": 0.3714251220226288, + "step": 687 + }, + { + "epoch": 0.34004695415791425, + "grad_norm": 2.286186750342226, + "learning_rate": 1.9228207429604224e-05, + "loss": 0.3551461696624756, + "step": 688 + }, + { + "epoch": 0.3405412084517484, + "grad_norm": 1.0184392375158444, + "learning_rate": 1.9225056795391554e-05, + "loss": 0.3543378412723541, + "step": 689 + }, + { + "epoch": 0.3410354627455826, + "grad_norm": 0.9670805241747071, + "learning_rate": 1.922190000266278e-05, + "loss": 0.3405894935131073, + "step": 690 + }, + { + "epoch": 0.34152971703941676, + "grad_norm": 1.0375943311061684, + "learning_rate": 1.9218737053525324e-05, + "loss": 0.36478808522224426, + "step": 691 + }, + { + "epoch": 0.34202397133325096, + "grad_norm": 1.036881907490894, + "learning_rate": 1.9215567950090734e-05, + "loss": 0.39778709411621094, + "step": 692 + }, + { + "epoch": 0.3425182256270851, + "grad_norm": 0.9719804294561131, + "learning_rate": 1.9212392694474654e-05, + "loss": 0.3553788661956787, + "step": 693 + }, + { + "epoch": 0.3430124799209193, + "grad_norm": 1.0265620111261864, + "learning_rate": 1.920921128879684e-05, + "loss": 0.3393115997314453, + "step": 694 + }, + { + "epoch": 0.3435067342147535, + "grad_norm": 1.2003228723584403, + "learning_rate": 1.9206023735181154e-05, + "loss": 0.4240456819534302, + "step": 695 + }, + { + "epoch": 0.3440009885085877, + "grad_norm": 1.0687040296992496, + "learning_rate": 1.920283003575556e-05, + "loss": 0.3451164960861206, + "step": 696 + }, + { + "epoch": 0.34449524280242183, + "grad_norm": 1.0859108204006387, + "learning_rate": 1.919963019265213e-05, + "loss": 0.4328063726425171, + "step": 697 + }, + { + "epoch": 0.34498949709625604, + "grad_norm": 0.9953984300461581, + "learning_rate": 1.9196424208007026e-05, + "loss": 0.35965877771377563, + "step": 698 + }, + { + "epoch": 0.3454837513900902, + "grad_norm": 1.0276560460371096, + "learning_rate": 1.9193212083960522e-05, + "loss": 0.40995267033576965, + "step": 699 + }, + { + "epoch": 0.3459780056839244, + "grad_norm": 1.047717179086883, + "learning_rate": 1.9189993822656984e-05, + "loss": 0.373586505651474, + "step": 700 + }, + { + "epoch": 0.34647225997775855, + "grad_norm": 0.967832395747722, + "learning_rate": 1.918676942624488e-05, + "loss": 0.3651657998561859, + "step": 701 + }, + { + "epoch": 0.34696651427159275, + "grad_norm": 0.9154206667420104, + "learning_rate": 1.918353889687677e-05, + "loss": 0.3333090543746948, + "step": 702 + }, + { + "epoch": 0.3474607685654269, + "grad_norm": 1.109347895406641, + "learning_rate": 1.9180302236709312e-05, + "loss": 0.444000780582428, + "step": 703 + }, + { + "epoch": 0.3479550228592611, + "grad_norm": 0.9543494832625998, + "learning_rate": 1.917705944790325e-05, + "loss": 0.34942537546157837, + "step": 704 + }, + { + "epoch": 0.34844927715309526, + "grad_norm": 1.206317081042567, + "learning_rate": 1.9173810532623425e-05, + "loss": 0.4709789752960205, + "step": 705 + }, + { + "epoch": 0.34894353144692947, + "grad_norm": 1.0126287373930702, + "learning_rate": 1.917055549303877e-05, + "loss": 0.3615723252296448, + "step": 706 + }, + { + "epoch": 0.3494377857407636, + "grad_norm": 0.98553805717422, + "learning_rate": 1.9167294331322293e-05, + "loss": 0.366035133600235, + "step": 707 + }, + { + "epoch": 0.3499320400345978, + "grad_norm": 1.085095649211616, + "learning_rate": 1.9164027049651105e-05, + "loss": 0.3916548490524292, + "step": 708 + }, + { + "epoch": 0.350426294328432, + "grad_norm": 1.0423550617328055, + "learning_rate": 1.91607536502064e-05, + "loss": 0.3752925992012024, + "step": 709 + }, + { + "epoch": 0.3509205486222662, + "grad_norm": 1.0859051595052658, + "learning_rate": 1.9157474135173448e-05, + "loss": 0.3471261262893677, + "step": 710 + }, + { + "epoch": 0.35141480291610033, + "grad_norm": 1.0011428490015388, + "learning_rate": 1.9154188506741605e-05, + "loss": 0.36898115277290344, + "step": 711 + }, + { + "epoch": 0.3519090572099345, + "grad_norm": 1.0150877470647623, + "learning_rate": 1.9150896767104315e-05, + "loss": 0.38236287236213684, + "step": 712 + }, + { + "epoch": 0.3524033115037687, + "grad_norm": 1.0813644645593066, + "learning_rate": 1.9147598918459096e-05, + "loss": 0.39260241389274597, + "step": 713 + }, + { + "epoch": 0.35289756579760284, + "grad_norm": 0.988095993083205, + "learning_rate": 1.9144294963007542e-05, + "loss": 0.3699083626270294, + "step": 714 + }, + { + "epoch": 0.35339182009143705, + "grad_norm": 0.9649609380548236, + "learning_rate": 1.914098490295532e-05, + "loss": 0.37720543146133423, + "step": 715 + }, + { + "epoch": 0.3538860743852712, + "grad_norm": 0.8834082509396699, + "learning_rate": 1.9137668740512195e-05, + "loss": 0.298441082239151, + "step": 716 + }, + { + "epoch": 0.3543803286791054, + "grad_norm": 0.996165149875045, + "learning_rate": 1.913434647789197e-05, + "loss": 0.3867550194263458, + "step": 717 + }, + { + "epoch": 0.35487458297293956, + "grad_norm": 0.9824732772890364, + "learning_rate": 1.913101811731256e-05, + "loss": 0.37111300230026245, + "step": 718 + }, + { + "epoch": 0.35536883726677376, + "grad_norm": 0.9874274570055057, + "learning_rate": 1.9127683660995916e-05, + "loss": 0.3922812342643738, + "step": 719 + }, + { + "epoch": 0.3558630915606079, + "grad_norm": 1.0744489462576237, + "learning_rate": 1.9124343111168077e-05, + "loss": 0.3878915309906006, + "step": 720 + }, + { + "epoch": 0.3563573458544421, + "grad_norm": 0.9551023310729483, + "learning_rate": 1.9120996470059153e-05, + "loss": 0.34974879026412964, + "step": 721 + }, + { + "epoch": 0.35685160014827627, + "grad_norm": 1.0403250728390605, + "learning_rate": 1.9117643739903306e-05, + "loss": 0.38341426849365234, + "step": 722 + }, + { + "epoch": 0.3573458544421105, + "grad_norm": 0.9876921724558848, + "learning_rate": 1.9114284922938772e-05, + "loss": 0.32610252499580383, + "step": 723 + }, + { + "epoch": 0.35784010873594463, + "grad_norm": 1.0486464385186933, + "learning_rate": 1.9110920021407855e-05, + "loss": 0.37203550338745117, + "step": 724 + }, + { + "epoch": 0.35833436302977884, + "grad_norm": 1.0809240289061282, + "learning_rate": 1.9107549037556906e-05, + "loss": 0.2954786419868469, + "step": 725 + }, + { + "epoch": 0.358828617323613, + "grad_norm": 0.9795897601711951, + "learning_rate": 1.9104171973636353e-05, + "loss": 0.33074450492858887, + "step": 726 + }, + { + "epoch": 0.3593228716174472, + "grad_norm": 1.0341587070514209, + "learning_rate": 1.9100788831900676e-05, + "loss": 0.350687712430954, + "step": 727 + }, + { + "epoch": 0.35981712591128134, + "grad_norm": 1.143909518582956, + "learning_rate": 1.9097399614608406e-05, + "loss": 0.3635619878768921, + "step": 728 + }, + { + "epoch": 0.36031138020511555, + "grad_norm": 1.0607740871884148, + "learning_rate": 1.909400432402214e-05, + "loss": 0.36409544944763184, + "step": 729 + }, + { + "epoch": 0.3608056344989497, + "grad_norm": 1.069313873032721, + "learning_rate": 1.9090602962408523e-05, + "loss": 0.4109501540660858, + "step": 730 + }, + { + "epoch": 0.3612998887927839, + "grad_norm": 1.0147750628685799, + "learning_rate": 1.908719553203826e-05, + "loss": 0.337943971157074, + "step": 731 + }, + { + "epoch": 0.36179414308661806, + "grad_norm": 1.0957860180414656, + "learning_rate": 1.9083782035186097e-05, + "loss": 0.36411553621292114, + "step": 732 + }, + { + "epoch": 0.36228839738045227, + "grad_norm": 1.1570738944902594, + "learning_rate": 1.908036247413084e-05, + "loss": 0.3513786494731903, + "step": 733 + }, + { + "epoch": 0.3627826516742864, + "grad_norm": 1.156885907892102, + "learning_rate": 1.907693685115534e-05, + "loss": 0.4017047584056854, + "step": 734 + }, + { + "epoch": 0.3632769059681206, + "grad_norm": 1.0932284273900412, + "learning_rate": 1.907350516854649e-05, + "loss": 0.3780835270881653, + "step": 735 + }, + { + "epoch": 0.3637711602619548, + "grad_norm": 1.10688269569213, + "learning_rate": 1.9070067428595234e-05, + "loss": 0.35562777519226074, + "step": 736 + }, + { + "epoch": 0.364265414555789, + "grad_norm": 1.0784034928358046, + "learning_rate": 1.9066623633596556e-05, + "loss": 0.34880492091178894, + "step": 737 + }, + { + "epoch": 0.36475966884962313, + "grad_norm": 1.1213824671894879, + "learning_rate": 1.9063173785849488e-05, + "loss": 0.3798677921295166, + "step": 738 + }, + { + "epoch": 0.3652539231434573, + "grad_norm": 1.0300538330170659, + "learning_rate": 1.9059717887657098e-05, + "loss": 0.371119886636734, + "step": 739 + }, + { + "epoch": 0.3657481774372915, + "grad_norm": 1.075537593372937, + "learning_rate": 1.9056255941326497e-05, + "loss": 0.3845891058444977, + "step": 740 + }, + { + "epoch": 0.36624243173112564, + "grad_norm": 1.0460904589757556, + "learning_rate": 1.9052787949168823e-05, + "loss": 0.34627166390419006, + "step": 741 + }, + { + "epoch": 0.36673668602495985, + "grad_norm": 1.0588032623720978, + "learning_rate": 1.9049313913499266e-05, + "loss": 0.3872081935405731, + "step": 742 + }, + { + "epoch": 0.367230940318794, + "grad_norm": 1.0173727289332204, + "learning_rate": 1.9045833836637038e-05, + "loss": 0.40446269512176514, + "step": 743 + }, + { + "epoch": 0.3677251946126282, + "grad_norm": 0.9672045860873493, + "learning_rate": 1.904234772090539e-05, + "loss": 0.3421085476875305, + "step": 744 + }, + { + "epoch": 0.36821944890646235, + "grad_norm": 0.9886363928023795, + "learning_rate": 1.90388555686316e-05, + "loss": 0.3626730442047119, + "step": 745 + }, + { + "epoch": 0.36871370320029656, + "grad_norm": 0.9308335236520315, + "learning_rate": 1.9035357382146984e-05, + "loss": 0.338506281375885, + "step": 746 + }, + { + "epoch": 0.3692079574941307, + "grad_norm": 1.010277605498289, + "learning_rate": 1.903185316378688e-05, + "loss": 0.3709959089756012, + "step": 747 + }, + { + "epoch": 0.3697022117879649, + "grad_norm": 1.0369282663858728, + "learning_rate": 1.9028342915890655e-05, + "loss": 0.3804059624671936, + "step": 748 + }, + { + "epoch": 0.37019646608179907, + "grad_norm": 1.0305613800678137, + "learning_rate": 1.9024826640801694e-05, + "loss": 0.3416539132595062, + "step": 749 + }, + { + "epoch": 0.3706907203756333, + "grad_norm": 1.0119233680399335, + "learning_rate": 1.9021304340867418e-05, + "loss": 0.3642072081565857, + "step": 750 + }, + { + "epoch": 0.3711849746694674, + "grad_norm": 0.9749783281253589, + "learning_rate": 1.9017776018439267e-05, + "loss": 0.35957199335098267, + "step": 751 + }, + { + "epoch": 0.37167922896330163, + "grad_norm": 1.1539382067501942, + "learning_rate": 1.9014241675872692e-05, + "loss": 0.38497287034988403, + "step": 752 + }, + { + "epoch": 0.3721734832571358, + "grad_norm": 1.1731793747690833, + "learning_rate": 1.9010701315527173e-05, + "loss": 0.40713614225387573, + "step": 753 + }, + { + "epoch": 0.37266773755097, + "grad_norm": 1.0417857344342851, + "learning_rate": 1.9007154939766196e-05, + "loss": 0.35115551948547363, + "step": 754 + }, + { + "epoch": 0.37316199184480414, + "grad_norm": 0.958988647508799, + "learning_rate": 1.9003602550957284e-05, + "loss": 0.3478096127510071, + "step": 755 + }, + { + "epoch": 0.37365624613863835, + "grad_norm": 1.040896998789985, + "learning_rate": 1.9000044151471956e-05, + "loss": 0.36460641026496887, + "step": 756 + }, + { + "epoch": 0.3741505004324725, + "grad_norm": 1.1161707385765272, + "learning_rate": 1.8996479743685745e-05, + "loss": 0.38015758991241455, + "step": 757 + }, + { + "epoch": 0.3746447547263067, + "grad_norm": 1.1039269634713542, + "learning_rate": 1.8992909329978202e-05, + "loss": 0.35270214080810547, + "step": 758 + }, + { + "epoch": 0.37513900902014086, + "grad_norm": 1.0025131869881447, + "learning_rate": 1.8989332912732884e-05, + "loss": 0.3875473439693451, + "step": 759 + }, + { + "epoch": 0.37563326331397506, + "grad_norm": 1.0209812095079043, + "learning_rate": 1.8985750494337353e-05, + "loss": 0.3281819820404053, + "step": 760 + }, + { + "epoch": 0.3761275176078092, + "grad_norm": 1.2490133288735825, + "learning_rate": 1.8982162077183182e-05, + "loss": 0.4081311821937561, + "step": 761 + }, + { + "epoch": 0.3766217719016434, + "grad_norm": 1.2134865751354402, + "learning_rate": 1.897856766366595e-05, + "loss": 0.3546852469444275, + "step": 762 + }, + { + "epoch": 0.37711602619547757, + "grad_norm": 0.9620958606777789, + "learning_rate": 1.8974967256185234e-05, + "loss": 0.3177235424518585, + "step": 763 + }, + { + "epoch": 0.3776102804893118, + "grad_norm": 1.0401218813843935, + "learning_rate": 1.8971360857144616e-05, + "loss": 0.3739625811576843, + "step": 764 + }, + { + "epoch": 0.37810453478314593, + "grad_norm": 0.9714277368627854, + "learning_rate": 1.8967748468951673e-05, + "loss": 0.32039010524749756, + "step": 765 + }, + { + "epoch": 0.3785987890769801, + "grad_norm": 1.0178844258047104, + "learning_rate": 1.8964130094017986e-05, + "loss": 0.3237234354019165, + "step": 766 + }, + { + "epoch": 0.3790930433708143, + "grad_norm": 1.0589536664735313, + "learning_rate": 1.896050573475913e-05, + "loss": 0.33864307403564453, + "step": 767 + }, + { + "epoch": 0.37958729766464844, + "grad_norm": 1.076259010215984, + "learning_rate": 1.8956875393594675e-05, + "loss": 0.40412086248397827, + "step": 768 + }, + { + "epoch": 0.38008155195848264, + "grad_norm": 1.049114130745209, + "learning_rate": 1.8953239072948185e-05, + "loss": 0.37689530849456787, + "step": 769 + }, + { + "epoch": 0.3805758062523168, + "grad_norm": 1.1429748380406861, + "learning_rate": 1.8949596775247215e-05, + "loss": 0.3632664680480957, + "step": 770 + }, + { + "epoch": 0.381070060546151, + "grad_norm": 1.0707340379824546, + "learning_rate": 1.8945948502923314e-05, + "loss": 0.384027361869812, + "step": 771 + }, + { + "epoch": 0.38156431483998515, + "grad_norm": 1.0884709757767692, + "learning_rate": 1.8942294258412012e-05, + "loss": 0.37623292207717896, + "step": 772 + }, + { + "epoch": 0.38205856913381936, + "grad_norm": 0.9918916696644151, + "learning_rate": 1.8938634044152837e-05, + "loss": 0.3449557423591614, + "step": 773 + }, + { + "epoch": 0.3825528234276535, + "grad_norm": 1.0216495444427651, + "learning_rate": 1.8934967862589287e-05, + "loss": 0.37977170944213867, + "step": 774 + }, + { + "epoch": 0.3830470777214877, + "grad_norm": 1.035626875821766, + "learning_rate": 1.893129571616886e-05, + "loss": 0.3535463809967041, + "step": 775 + }, + { + "epoch": 0.38354133201532187, + "grad_norm": 0.9784961361645077, + "learning_rate": 1.8927617607343024e-05, + "loss": 0.3107556104660034, + "step": 776 + }, + { + "epoch": 0.3840355863091561, + "grad_norm": 0.9647734455274504, + "learning_rate": 1.8923933538567238e-05, + "loss": 0.33028605580329895, + "step": 777 + }, + { + "epoch": 0.3845298406029902, + "grad_norm": 1.0880250729774004, + "learning_rate": 1.8920243512300925e-05, + "loss": 0.35947421193122864, + "step": 778 + }, + { + "epoch": 0.38502409489682443, + "grad_norm": 1.1225656593555045, + "learning_rate": 1.89165475310075e-05, + "loss": 0.36262935400009155, + "step": 779 + }, + { + "epoch": 0.3855183491906586, + "grad_norm": 0.9595574558826961, + "learning_rate": 1.8912845597154344e-05, + "loss": 0.3441828489303589, + "step": 780 + }, + { + "epoch": 0.3860126034844928, + "grad_norm": 1.1060761912194574, + "learning_rate": 1.8909137713212813e-05, + "loss": 0.3748928904533386, + "step": 781 + }, + { + "epoch": 0.38650685777832694, + "grad_norm": 1.0401989681427097, + "learning_rate": 1.8905423881658248e-05, + "loss": 0.3571966588497162, + "step": 782 + }, + { + "epoch": 0.38700111207216115, + "grad_norm": 1.0661600684644588, + "learning_rate": 1.8901704104969937e-05, + "loss": 0.3937920331954956, + "step": 783 + }, + { + "epoch": 0.3874953663659953, + "grad_norm": 1.036207969764135, + "learning_rate": 1.8897978385631157e-05, + "loss": 0.3641708493232727, + "step": 784 + }, + { + "epoch": 0.3879896206598295, + "grad_norm": 1.0259735566777997, + "learning_rate": 1.8894246726129143e-05, + "loss": 0.33510833978652954, + "step": 785 + }, + { + "epoch": 0.38848387495366365, + "grad_norm": 1.0496886995032506, + "learning_rate": 1.88905091289551e-05, + "loss": 0.3553236722946167, + "step": 786 + }, + { + "epoch": 0.38897812924749786, + "grad_norm": 1.1065055000350301, + "learning_rate": 1.8886765596604188e-05, + "loss": 0.3802195191383362, + "step": 787 + }, + { + "epoch": 0.389472383541332, + "grad_norm": 1.0233155379560877, + "learning_rate": 1.8883016131575546e-05, + "loss": 0.3672805726528168, + "step": 788 + }, + { + "epoch": 0.3899666378351662, + "grad_norm": 1.1021600101810725, + "learning_rate": 1.887926073637225e-05, + "loss": 0.35715609788894653, + "step": 789 + }, + { + "epoch": 0.39046089212900037, + "grad_norm": 1.0669470229074853, + "learning_rate": 1.8875499413501362e-05, + "loss": 0.3800659775733948, + "step": 790 + }, + { + "epoch": 0.3909551464228345, + "grad_norm": 1.0110531011706714, + "learning_rate": 1.8871732165473878e-05, + "loss": 0.36886462569236755, + "step": 791 + }, + { + "epoch": 0.3914494007166687, + "grad_norm": 1.1716485087298352, + "learning_rate": 1.886795899480476e-05, + "loss": 0.37373536825180054, + "step": 792 + }, + { + "epoch": 0.3919436550105029, + "grad_norm": 1.1804493539453536, + "learning_rate": 1.8864179904012932e-05, + "loss": 0.4016551375389099, + "step": 793 + }, + { + "epoch": 0.3924379093043371, + "grad_norm": 1.3227573763511704, + "learning_rate": 1.886039489562125e-05, + "loss": 0.35107535123825073, + "step": 794 + }, + { + "epoch": 0.39293216359817124, + "grad_norm": 1.0690764214154878, + "learning_rate": 1.8856603972156532e-05, + "loss": 0.36280331015586853, + "step": 795 + }, + { + "epoch": 0.39342641789200544, + "grad_norm": 1.0372650355149657, + "learning_rate": 1.885280713614955e-05, + "loss": 0.3417884111404419, + "step": 796 + }, + { + "epoch": 0.3939206721858396, + "grad_norm": 1.0135638633522712, + "learning_rate": 1.8849004390135017e-05, + "loss": 0.3257544934749603, + "step": 797 + }, + { + "epoch": 0.3944149264796738, + "grad_norm": 1.138312578356034, + "learning_rate": 1.8845195736651588e-05, + "loss": 0.3694860339164734, + "step": 798 + }, + { + "epoch": 0.39490918077350795, + "grad_norm": 1.0432466517484986, + "learning_rate": 1.8841381178241865e-05, + "loss": 0.37279266119003296, + "step": 799 + }, + { + "epoch": 0.39540343506734216, + "grad_norm": 1.023281980764518, + "learning_rate": 1.88375607174524e-05, + "loss": 0.38758352398872375, + "step": 800 + }, + { + "epoch": 0.3958976893611763, + "grad_norm": 1.0321652923702807, + "learning_rate": 1.883373435683367e-05, + "loss": 0.34098950028419495, + "step": 801 + }, + { + "epoch": 0.3963919436550105, + "grad_norm": 1.0256865325574602, + "learning_rate": 1.8829902098940105e-05, + "loss": 0.3278653621673584, + "step": 802 + }, + { + "epoch": 0.39688619794884467, + "grad_norm": 1.1042531688452888, + "learning_rate": 1.8826063946330065e-05, + "loss": 0.3673133850097656, + "step": 803 + }, + { + "epoch": 0.39738045224267887, + "grad_norm": 0.9510108180701087, + "learning_rate": 1.882221990156584e-05, + "loss": 0.37917453050613403, + "step": 804 + }, + { + "epoch": 0.397874706536513, + "grad_norm": 0.9926574292369763, + "learning_rate": 1.8818369967213662e-05, + "loss": 0.33986327052116394, + "step": 805 + }, + { + "epoch": 0.39836896083034723, + "grad_norm": 1.0256369099360807, + "learning_rate": 1.8814514145843694e-05, + "loss": 0.34402647614479065, + "step": 806 + }, + { + "epoch": 0.3988632151241814, + "grad_norm": 1.0984836868071073, + "learning_rate": 1.8810652440030026e-05, + "loss": 0.32781803607940674, + "step": 807 + }, + { + "epoch": 0.3993574694180156, + "grad_norm": 1.063630501097469, + "learning_rate": 1.8806784852350678e-05, + "loss": 0.35807961225509644, + "step": 808 + }, + { + "epoch": 0.39985172371184974, + "grad_norm": 1.0130076092125457, + "learning_rate": 1.8802911385387596e-05, + "loss": 0.33577096462249756, + "step": 809 + }, + { + "epoch": 0.40034597800568394, + "grad_norm": 1.205635135602797, + "learning_rate": 1.8799032041726654e-05, + "loss": 0.37786391377449036, + "step": 810 + }, + { + "epoch": 0.4008402322995181, + "grad_norm": 1.0055899694647235, + "learning_rate": 1.879514682395764e-05, + "loss": 0.3237725496292114, + "step": 811 + }, + { + "epoch": 0.4013344865933523, + "grad_norm": 1.0557641796624602, + "learning_rate": 1.8791255734674275e-05, + "loss": 0.29552844166755676, + "step": 812 + }, + { + "epoch": 0.40182874088718645, + "grad_norm": 1.0675222800328668, + "learning_rate": 1.8787358776474192e-05, + "loss": 0.40317612886428833, + "step": 813 + }, + { + "epoch": 0.40232299518102066, + "grad_norm": 1.0548023053217102, + "learning_rate": 1.8783455951958948e-05, + "loss": 0.33383694291114807, + "step": 814 + }, + { + "epoch": 0.4028172494748548, + "grad_norm": 1.0255061005640398, + "learning_rate": 1.8779547263734012e-05, + "loss": 0.35020262002944946, + "step": 815 + }, + { + "epoch": 0.403311503768689, + "grad_norm": 1.098709822155027, + "learning_rate": 1.8775632714408765e-05, + "loss": 0.3742774724960327, + "step": 816 + }, + { + "epoch": 0.40380575806252317, + "grad_norm": 0.9986084839363315, + "learning_rate": 1.8771712306596506e-05, + "loss": 0.35037580132484436, + "step": 817 + }, + { + "epoch": 0.4043000123563573, + "grad_norm": 1.078218018297503, + "learning_rate": 1.8767786042914445e-05, + "loss": 0.3416820168495178, + "step": 818 + }, + { + "epoch": 0.4047942666501915, + "grad_norm": 1.0398523365943921, + "learning_rate": 1.8763853925983695e-05, + "loss": 0.33287927508354187, + "step": 819 + }, + { + "epoch": 0.4052885209440257, + "grad_norm": 1.031774367057856, + "learning_rate": 1.875991595842929e-05, + "loss": 0.3493141531944275, + "step": 820 + }, + { + "epoch": 0.4057827752378599, + "grad_norm": 1.1647269737420223, + "learning_rate": 1.875597214288015e-05, + "loss": 0.4184780418872833, + "step": 821 + }, + { + "epoch": 0.40627702953169403, + "grad_norm": 1.0098974718957208, + "learning_rate": 1.8752022481969116e-05, + "loss": 0.33189794421195984, + "step": 822 + }, + { + "epoch": 0.40677128382552824, + "grad_norm": 1.1012026040533913, + "learning_rate": 1.8748066978332925e-05, + "loss": 0.35339856147766113, + "step": 823 + }, + { + "epoch": 0.4072655381193624, + "grad_norm": 0.990995886573267, + "learning_rate": 1.874410563461221e-05, + "loss": 0.3766328692436218, + "step": 824 + }, + { + "epoch": 0.4077597924131966, + "grad_norm": 1.023451056136873, + "learning_rate": 1.874013845345152e-05, + "loss": 0.32575076818466187, + "step": 825 + }, + { + "epoch": 0.40825404670703075, + "grad_norm": 0.9933822197860499, + "learning_rate": 1.8736165437499273e-05, + "loss": 0.3417864441871643, + "step": 826 + }, + { + "epoch": 0.40874830100086496, + "grad_norm": 1.053854919420327, + "learning_rate": 1.8732186589407807e-05, + "loss": 0.3636544942855835, + "step": 827 + }, + { + "epoch": 0.4092425552946991, + "grad_norm": 1.0398605740994966, + "learning_rate": 1.872820191183334e-05, + "loss": 0.38730406761169434, + "step": 828 + }, + { + "epoch": 0.4097368095885333, + "grad_norm": 1.031894160648423, + "learning_rate": 1.872421140743599e-05, + "loss": 0.3593043088912964, + "step": 829 + }, + { + "epoch": 0.41023106388236746, + "grad_norm": 1.046860972263581, + "learning_rate": 1.872021507887976e-05, + "loss": 0.39092978835105896, + "step": 830 + }, + { + "epoch": 0.41072531817620167, + "grad_norm": 1.1607362555786684, + "learning_rate": 1.8716212928832537e-05, + "loss": 0.3745616674423218, + "step": 831 + }, + { + "epoch": 0.4112195724700358, + "grad_norm": 1.1451994826740608, + "learning_rate": 1.87122049599661e-05, + "loss": 0.39571845531463623, + "step": 832 + }, + { + "epoch": 0.41171382676387, + "grad_norm": 1.0987542615004384, + "learning_rate": 1.8708191174956116e-05, + "loss": 0.35459476709365845, + "step": 833 + }, + { + "epoch": 0.4122080810577042, + "grad_norm": 1.1159636372579822, + "learning_rate": 1.870417157648213e-05, + "loss": 0.38937896490097046, + "step": 834 + }, + { + "epoch": 0.4127023353515384, + "grad_norm": 1.002441779942121, + "learning_rate": 1.8700146167227563e-05, + "loss": 0.33595120906829834, + "step": 835 + }, + { + "epoch": 0.41319658964537254, + "grad_norm": 0.9899088387295479, + "learning_rate": 1.869611494987973e-05, + "loss": 0.332889199256897, + "step": 836 + }, + { + "epoch": 0.41369084393920674, + "grad_norm": 1.0005984941908395, + "learning_rate": 1.8692077927129803e-05, + "loss": 0.333438515663147, + "step": 837 + }, + { + "epoch": 0.4141850982330409, + "grad_norm": 0.9672990037342486, + "learning_rate": 1.868803510167285e-05, + "loss": 0.30645743012428284, + "step": 838 + }, + { + "epoch": 0.4146793525268751, + "grad_norm": 1.0166404987540014, + "learning_rate": 1.86839864762078e-05, + "loss": 0.3333967924118042, + "step": 839 + }, + { + "epoch": 0.41517360682070925, + "grad_norm": 1.1324675944020866, + "learning_rate": 1.867993205343746e-05, + "loss": 0.36230576038360596, + "step": 840 + }, + { + "epoch": 0.41566786111454346, + "grad_norm": 1.4565152055506116, + "learning_rate": 1.8675871836068498e-05, + "loss": 0.34191709756851196, + "step": 841 + }, + { + "epoch": 0.4161621154083776, + "grad_norm": 1.1876819294674656, + "learning_rate": 1.8671805826811462e-05, + "loss": 0.3115188479423523, + "step": 842 + }, + { + "epoch": 0.4166563697022118, + "grad_norm": 1.023080563524472, + "learning_rate": 1.866773402838076e-05, + "loss": 0.3725768029689789, + "step": 843 + }, + { + "epoch": 0.41715062399604597, + "grad_norm": 1.1051799194693688, + "learning_rate": 1.8663656443494673e-05, + "loss": 0.376983642578125, + "step": 844 + }, + { + "epoch": 0.4176448782898801, + "grad_norm": 1.0101343157113072, + "learning_rate": 1.8659573074875327e-05, + "loss": 0.31490784883499146, + "step": 845 + }, + { + "epoch": 0.4181391325837143, + "grad_norm": 1.0250002510666845, + "learning_rate": 1.8655483925248727e-05, + "loss": 0.3533504605293274, + "step": 846 + }, + { + "epoch": 0.4186333868775485, + "grad_norm": 1.090746715781531, + "learning_rate": 1.8651388997344734e-05, + "loss": 0.3282274305820465, + "step": 847 + }, + { + "epoch": 0.4191276411713827, + "grad_norm": 1.1145704933282803, + "learning_rate": 1.8647288293897055e-05, + "loss": 0.32892414927482605, + "step": 848 + }, + { + "epoch": 0.41962189546521683, + "grad_norm": 1.1451436882679205, + "learning_rate": 1.864318181764327e-05, + "loss": 0.40414246916770935, + "step": 849 + }, + { + "epoch": 0.42011614975905104, + "grad_norm": 0.9874933781402742, + "learning_rate": 1.8639069571324798e-05, + "loss": 0.30335378646850586, + "step": 850 + }, + { + "epoch": 0.4206104040528852, + "grad_norm": 1.0390790492756226, + "learning_rate": 1.863495155768692e-05, + "loss": 0.311710000038147, + "step": 851 + }, + { + "epoch": 0.4211046583467194, + "grad_norm": 1.1685121542837038, + "learning_rate": 1.8630827779478755e-05, + "loss": 0.37345218658447266, + "step": 852 + }, + { + "epoch": 0.42159891264055355, + "grad_norm": 1.118375459884757, + "learning_rate": 1.8626698239453287e-05, + "loss": 0.37286317348480225, + "step": 853 + }, + { + "epoch": 0.42209316693438775, + "grad_norm": 1.061435107804804, + "learning_rate": 1.8622562940367335e-05, + "loss": 0.3706691861152649, + "step": 854 + }, + { + "epoch": 0.4225874212282219, + "grad_norm": 1.045639661440086, + "learning_rate": 1.8618421884981567e-05, + "loss": 0.30183354020118713, + "step": 855 + }, + { + "epoch": 0.4230816755220561, + "grad_norm": 0.9282918926966607, + "learning_rate": 1.8614275076060486e-05, + "loss": 0.32329827547073364, + "step": 856 + }, + { + "epoch": 0.42357592981589026, + "grad_norm": 0.9823332197669685, + "learning_rate": 1.861012251637245e-05, + "loss": 0.39380010962486267, + "step": 857 + }, + { + "epoch": 0.42407018410972447, + "grad_norm": 1.2258684110272524, + "learning_rate": 1.8605964208689646e-05, + "loss": 0.41745316982269287, + "step": 858 + }, + { + "epoch": 0.4245644384035586, + "grad_norm": 1.0539643629085786, + "learning_rate": 1.86018001557881e-05, + "loss": 0.36751389503479004, + "step": 859 + }, + { + "epoch": 0.4250586926973928, + "grad_norm": 1.052378043397748, + "learning_rate": 1.8597630360447673e-05, + "loss": 0.36876100301742554, + "step": 860 + }, + { + "epoch": 0.425552946991227, + "grad_norm": 1.0649813734142937, + "learning_rate": 1.8593454825452067e-05, + "loss": 0.3473365306854248, + "step": 861 + }, + { + "epoch": 0.4260472012850612, + "grad_norm": 1.0186749062796028, + "learning_rate": 1.8589273553588802e-05, + "loss": 0.3429828882217407, + "step": 862 + }, + { + "epoch": 0.42654145557889533, + "grad_norm": 0.9471164855143414, + "learning_rate": 1.8585086547649238e-05, + "loss": 0.3424219787120819, + "step": 863 + }, + { + "epoch": 0.42703570987272954, + "grad_norm": 1.002345729786534, + "learning_rate": 1.8580893810428562e-05, + "loss": 0.32187891006469727, + "step": 864 + }, + { + "epoch": 0.4275299641665637, + "grad_norm": 0.997893238522563, + "learning_rate": 1.8576695344725785e-05, + "loss": 0.3116072416305542, + "step": 865 + }, + { + "epoch": 0.4280242184603979, + "grad_norm": 0.9198063604105835, + "learning_rate": 1.8572491153343742e-05, + "loss": 0.32645124197006226, + "step": 866 + }, + { + "epoch": 0.42851847275423205, + "grad_norm": 1.0827892730720303, + "learning_rate": 1.8568281239089088e-05, + "loss": 0.36861616373062134, + "step": 867 + }, + { + "epoch": 0.42901272704806626, + "grad_norm": 1.05561333743087, + "learning_rate": 1.8564065604772307e-05, + "loss": 0.38477885723114014, + "step": 868 + }, + { + "epoch": 0.4295069813419004, + "grad_norm": 1.1711610330815532, + "learning_rate": 1.8559844253207694e-05, + "loss": 0.352588951587677, + "step": 869 + }, + { + "epoch": 0.43000123563573456, + "grad_norm": 1.1459489566657088, + "learning_rate": 1.8555617187213362e-05, + "loss": 0.43443864583969116, + "step": 870 + }, + { + "epoch": 0.43049548992956876, + "grad_norm": 1.1608032541581428, + "learning_rate": 1.8551384409611238e-05, + "loss": 0.37355685234069824, + "step": 871 + }, + { + "epoch": 0.4309897442234029, + "grad_norm": 1.120838755410591, + "learning_rate": 1.854714592322707e-05, + "loss": 0.3529026508331299, + "step": 872 + }, + { + "epoch": 0.4314839985172371, + "grad_norm": 1.031744932760461, + "learning_rate": 1.854290173089041e-05, + "loss": 0.3278823494911194, + "step": 873 + }, + { + "epoch": 0.4319782528110713, + "grad_norm": 1.045846838310407, + "learning_rate": 1.8538651835434615e-05, + "loss": 0.3677588999271393, + "step": 874 + }, + { + "epoch": 0.4324725071049055, + "grad_norm": 0.9726822011565114, + "learning_rate": 1.8534396239696852e-05, + "loss": 0.34132176637649536, + "step": 875 + }, + { + "epoch": 0.43296676139873963, + "grad_norm": 0.967842291132869, + "learning_rate": 1.8530134946518106e-05, + "loss": 0.3329963684082031, + "step": 876 + }, + { + "epoch": 0.43346101569257384, + "grad_norm": 1.1447169522915757, + "learning_rate": 1.852586795874315e-05, + "loss": 0.38435080647468567, + "step": 877 + }, + { + "epoch": 0.433955269986408, + "grad_norm": 1.076068410050275, + "learning_rate": 1.8521595279220564e-05, + "loss": 0.3737541735172272, + "step": 878 + }, + { + "epoch": 0.4344495242802422, + "grad_norm": 1.0947429210573731, + "learning_rate": 1.851731691080273e-05, + "loss": 0.3676382303237915, + "step": 879 + }, + { + "epoch": 0.43494377857407635, + "grad_norm": 0.9624268111771948, + "learning_rate": 1.8513032856345825e-05, + "loss": 0.317960262298584, + "step": 880 + }, + { + "epoch": 0.43543803286791055, + "grad_norm": 1.040958800557315, + "learning_rate": 1.8508743118709816e-05, + "loss": 0.38857966661453247, + "step": 881 + }, + { + "epoch": 0.4359322871617447, + "grad_norm": 1.0694529449199925, + "learning_rate": 1.8504447700758482e-05, + "loss": 0.33234506845474243, + "step": 882 + }, + { + "epoch": 0.4364265414555789, + "grad_norm": 1.0262098516685678, + "learning_rate": 1.8500146605359375e-05, + "loss": 0.3380611538887024, + "step": 883 + }, + { + "epoch": 0.43692079574941306, + "grad_norm": 1.032922511494617, + "learning_rate": 1.8495839835383845e-05, + "loss": 0.36386823654174805, + "step": 884 + }, + { + "epoch": 0.43741505004324727, + "grad_norm": 1.0814661245803954, + "learning_rate": 1.849152739370703e-05, + "loss": 0.34711897373199463, + "step": 885 + }, + { + "epoch": 0.4379093043370814, + "grad_norm": 1.1112439466083954, + "learning_rate": 1.848720928320786e-05, + "loss": 0.3861457109451294, + "step": 886 + }, + { + "epoch": 0.4384035586309156, + "grad_norm": 1.0062524071684966, + "learning_rate": 1.848288550676904e-05, + "loss": 0.3387115001678467, + "step": 887 + }, + { + "epoch": 0.4388978129247498, + "grad_norm": 1.119801920916648, + "learning_rate": 1.847855606727706e-05, + "loss": 0.3419748842716217, + "step": 888 + }, + { + "epoch": 0.439392067218584, + "grad_norm": 1.1162084355940824, + "learning_rate": 1.847422096762219e-05, + "loss": 0.38184499740600586, + "step": 889 + }, + { + "epoch": 0.43988632151241813, + "grad_norm": 1.1974191241625343, + "learning_rate": 1.846988021069849e-05, + "loss": 0.3845345973968506, + "step": 890 + }, + { + "epoch": 0.44038057580625234, + "grad_norm": 1.035257767207683, + "learning_rate": 1.8465533799403778e-05, + "loss": 0.31854647397994995, + "step": 891 + }, + { + "epoch": 0.4408748301000865, + "grad_norm": 1.2150547461116588, + "learning_rate": 1.8461181736639658e-05, + "loss": 0.3940027356147766, + "step": 892 + }, + { + "epoch": 0.4413690843939207, + "grad_norm": 1.0827124100419134, + "learning_rate": 1.8456824025311508e-05, + "loss": 0.3580612540245056, + "step": 893 + }, + { + "epoch": 0.44186333868775485, + "grad_norm": 1.0457692243819372, + "learning_rate": 1.8452460668328474e-05, + "loss": 0.3662642240524292, + "step": 894 + }, + { + "epoch": 0.44235759298158905, + "grad_norm": 1.3135451040729966, + "learning_rate": 1.8448091668603464e-05, + "loss": 0.29031360149383545, + "step": 895 + }, + { + "epoch": 0.4428518472754232, + "grad_norm": 1.2267380523250877, + "learning_rate": 1.844371702905317e-05, + "loss": 0.36141306161880493, + "step": 896 + }, + { + "epoch": 0.44334610156925736, + "grad_norm": 0.9926258795727512, + "learning_rate": 1.8439336752598027e-05, + "loss": 0.35286253690719604, + "step": 897 + }, + { + "epoch": 0.44384035586309156, + "grad_norm": 1.0509214985554662, + "learning_rate": 1.8434950842162256e-05, + "loss": 0.38967087864875793, + "step": 898 + }, + { + "epoch": 0.4443346101569257, + "grad_norm": 1.1041873655686079, + "learning_rate": 1.8430559300673824e-05, + "loss": 0.4260423183441162, + "step": 899 + }, + { + "epoch": 0.4448288644507599, + "grad_norm": 1.0004221402171782, + "learning_rate": 1.8426162131064456e-05, + "loss": 0.35336780548095703, + "step": 900 + }, + { + "epoch": 0.44532311874459407, + "grad_norm": 1.0124996907215051, + "learning_rate": 1.842175933626965e-05, + "loss": 0.32953035831451416, + "step": 901 + }, + { + "epoch": 0.4458173730384283, + "grad_norm": 1.1481125848953921, + "learning_rate": 1.841735091922864e-05, + "loss": 0.3495085537433624, + "step": 902 + }, + { + "epoch": 0.44631162733226243, + "grad_norm": 1.0556558347257945, + "learning_rate": 1.8412936882884426e-05, + "loss": 0.3774382174015045, + "step": 903 + }, + { + "epoch": 0.44680588162609663, + "grad_norm": 1.1488659780400408, + "learning_rate": 1.8408517230183756e-05, + "loss": 0.397183358669281, + "step": 904 + }, + { + "epoch": 0.4473001359199308, + "grad_norm": 1.1226988100601583, + "learning_rate": 1.840409196407713e-05, + "loss": 0.4004632234573364, + "step": 905 + }, + { + "epoch": 0.447794390213765, + "grad_norm": 0.9888048683742604, + "learning_rate": 1.8399661087518784e-05, + "loss": 0.3464478850364685, + "step": 906 + }, + { + "epoch": 0.44828864450759914, + "grad_norm": 1.0618254470638813, + "learning_rate": 1.839522460346671e-05, + "loss": 0.38161879777908325, + "step": 907 + }, + { + "epoch": 0.44878289880143335, + "grad_norm": 1.0021571541379897, + "learning_rate": 1.839078251488265e-05, + "loss": 0.3307412266731262, + "step": 908 + }, + { + "epoch": 0.4492771530952675, + "grad_norm": 1.0558486391083746, + "learning_rate": 1.838633482473207e-05, + "loss": 0.3238945007324219, + "step": 909 + }, + { + "epoch": 0.4497714073891017, + "grad_norm": 1.1763396472681338, + "learning_rate": 1.8381881535984186e-05, + "loss": 0.37863802909851074, + "step": 910 + }, + { + "epoch": 0.45026566168293586, + "grad_norm": 1.187536001798055, + "learning_rate": 1.8377422651611955e-05, + "loss": 0.35920199751853943, + "step": 911 + }, + { + "epoch": 0.45075991597677006, + "grad_norm": 1.1108046485108733, + "learning_rate": 1.8372958174592054e-05, + "loss": 0.3913283050060272, + "step": 912 + }, + { + "epoch": 0.4512541702706042, + "grad_norm": 1.029447767687351, + "learning_rate": 1.8368488107904916e-05, + "loss": 0.32950836420059204, + "step": 913 + }, + { + "epoch": 0.4517484245644384, + "grad_norm": 0.9275296283957708, + "learning_rate": 1.8364012454534687e-05, + "loss": 0.30557066202163696, + "step": 914 + }, + { + "epoch": 0.4522426788582726, + "grad_norm": 1.0685283966213752, + "learning_rate": 1.835953121746925e-05, + "loss": 0.3280435800552368, + "step": 915 + }, + { + "epoch": 0.4527369331521068, + "grad_norm": 1.0053118292301932, + "learning_rate": 1.835504439970021e-05, + "loss": 0.323611319065094, + "step": 916 + }, + { + "epoch": 0.45323118744594093, + "grad_norm": 1.086332749113099, + "learning_rate": 1.835055200422292e-05, + "loss": 0.3794775605201721, + "step": 917 + }, + { + "epoch": 0.45372544173977514, + "grad_norm": 1.1746257984153148, + "learning_rate": 1.8346054034036418e-05, + "loss": 0.3437816798686981, + "step": 918 + }, + { + "epoch": 0.4542196960336093, + "grad_norm": 1.175593282348777, + "learning_rate": 1.8341550492143497e-05, + "loss": 0.40312957763671875, + "step": 919 + }, + { + "epoch": 0.4547139503274435, + "grad_norm": 1.0344840643948632, + "learning_rate": 1.833704138155065e-05, + "loss": 0.33988016843795776, + "step": 920 + }, + { + "epoch": 0.45520820462127765, + "grad_norm": 1.099362227926189, + "learning_rate": 1.83325267052681e-05, + "loss": 0.30893969535827637, + "step": 921 + }, + { + "epoch": 0.45570245891511185, + "grad_norm": 1.1279932203915406, + "learning_rate": 1.832800646630978e-05, + "loss": 0.3351095914840698, + "step": 922 + }, + { + "epoch": 0.456196713208946, + "grad_norm": 1.0211776718159757, + "learning_rate": 1.8323480667693335e-05, + "loss": 0.3235122561454773, + "step": 923 + }, + { + "epoch": 0.45669096750278015, + "grad_norm": 1.0274671423740642, + "learning_rate": 1.8318949312440126e-05, + "loss": 0.3482256531715393, + "step": 924 + }, + { + "epoch": 0.45718522179661436, + "grad_norm": 1.0223238909560575, + "learning_rate": 1.831441240357522e-05, + "loss": 0.3577580451965332, + "step": 925 + }, + { + "epoch": 0.4576794760904485, + "grad_norm": 1.100617534966992, + "learning_rate": 1.8309869944127386e-05, + "loss": 0.34081172943115234, + "step": 926 + }, + { + "epoch": 0.4581737303842827, + "grad_norm": 1.1911908757683491, + "learning_rate": 1.8305321937129118e-05, + "loss": 0.4041389524936676, + "step": 927 + }, + { + "epoch": 0.45866798467811687, + "grad_norm": 0.9300326755373893, + "learning_rate": 1.830076838561659e-05, + "loss": 0.3014240562915802, + "step": 928 + }, + { + "epoch": 0.4591622389719511, + "grad_norm": 1.0061666296037273, + "learning_rate": 1.829620929262969e-05, + "loss": 0.3105698823928833, + "step": 929 + }, + { + "epoch": 0.4596564932657852, + "grad_norm": 1.035696211609358, + "learning_rate": 1.8291644661212008e-05, + "loss": 0.36114832758903503, + "step": 930 + }, + { + "epoch": 0.46015074755961943, + "grad_norm": 1.0621844186259055, + "learning_rate": 1.828707449441082e-05, + "loss": 0.33738240599632263, + "step": 931 + }, + { + "epoch": 0.4606450018534536, + "grad_norm": 1.0507412286541111, + "learning_rate": 1.8282498795277108e-05, + "loss": 0.3455100655555725, + "step": 932 + }, + { + "epoch": 0.4611392561472878, + "grad_norm": 1.0635377650103532, + "learning_rate": 1.8277917566865544e-05, + "loss": 0.3622395992279053, + "step": 933 + }, + { + "epoch": 0.46163351044112194, + "grad_norm": 1.1698746861585616, + "learning_rate": 1.8273330812234488e-05, + "loss": 0.36942192912101746, + "step": 934 + }, + { + "epoch": 0.46212776473495615, + "grad_norm": 1.1083328377879573, + "learning_rate": 1.8268738534445996e-05, + "loss": 0.33603039383888245, + "step": 935 + }, + { + "epoch": 0.4626220190287903, + "grad_norm": 1.0473328437100615, + "learning_rate": 1.82641407365658e-05, + "loss": 0.34806567430496216, + "step": 936 + }, + { + "epoch": 0.4631162733226245, + "grad_norm": 1.0559884618945852, + "learning_rate": 1.8259537421663333e-05, + "loss": 0.35512328147888184, + "step": 937 + }, + { + "epoch": 0.46361052761645866, + "grad_norm": 1.0108795008514326, + "learning_rate": 1.8254928592811695e-05, + "loss": 0.33349719643592834, + "step": 938 + }, + { + "epoch": 0.46410478191029286, + "grad_norm": 1.2122442261111321, + "learning_rate": 1.8250314253087677e-05, + "loss": 0.3510274887084961, + "step": 939 + }, + { + "epoch": 0.464599036204127, + "grad_norm": 1.2184941603930532, + "learning_rate": 1.824569440557175e-05, + "loss": 0.35831883549690247, + "step": 940 + }, + { + "epoch": 0.4650932904979612, + "grad_norm": 1.1635496425287044, + "learning_rate": 1.824106905334805e-05, + "loss": 0.353208065032959, + "step": 941 + }, + { + "epoch": 0.46558754479179537, + "grad_norm": 1.1400926219916139, + "learning_rate": 1.8236438199504402e-05, + "loss": 0.3335849642753601, + "step": 942 + }, + { + "epoch": 0.4660817990856296, + "grad_norm": 1.0623049779098108, + "learning_rate": 1.8231801847132294e-05, + "loss": 0.346247136592865, + "step": 943 + }, + { + "epoch": 0.46657605337946373, + "grad_norm": 1.0719060242361118, + "learning_rate": 1.8227159999326895e-05, + "loss": 0.35125380754470825, + "step": 944 + }, + { + "epoch": 0.46707030767329794, + "grad_norm": 1.026675887024196, + "learning_rate": 1.822251265918703e-05, + "loss": 0.34262675046920776, + "step": 945 + }, + { + "epoch": 0.4675645619671321, + "grad_norm": 1.0951735908349534, + "learning_rate": 1.82178598298152e-05, + "loss": 0.3437168598175049, + "step": 946 + }, + { + "epoch": 0.4680588162609663, + "grad_norm": 1.2204880290084008, + "learning_rate": 1.8213201514317565e-05, + "loss": 0.35729774832725525, + "step": 947 + }, + { + "epoch": 0.46855307055480044, + "grad_norm": 1.1062871199303559, + "learning_rate": 1.8208537715803954e-05, + "loss": 0.36507898569107056, + "step": 948 + }, + { + "epoch": 0.46904732484863465, + "grad_norm": 1.0875432400928187, + "learning_rate": 1.8203868437387847e-05, + "loss": 0.363017737865448, + "step": 949 + }, + { + "epoch": 0.4695415791424688, + "grad_norm": 1.0718622311605446, + "learning_rate": 1.8199193682186388e-05, + "loss": 0.3645821511745453, + "step": 950 + }, + { + "epoch": 0.47003583343630295, + "grad_norm": 1.2195854283374437, + "learning_rate": 1.8194513453320387e-05, + "loss": 0.3054324686527252, + "step": 951 + }, + { + "epoch": 0.47053008773013716, + "grad_norm": 1.0538248118306075, + "learning_rate": 1.8189827753914282e-05, + "loss": 0.35003694891929626, + "step": 952 + }, + { + "epoch": 0.4710243420239713, + "grad_norm": 1.1789267282791076, + "learning_rate": 1.8185136587096193e-05, + "loss": 0.37834814190864563, + "step": 953 + }, + { + "epoch": 0.4715185963178055, + "grad_norm": 1.0741971770420784, + "learning_rate": 1.8180439955997867e-05, + "loss": 0.3369285464286804, + "step": 954 + }, + { + "epoch": 0.47201285061163967, + "grad_norm": 1.010532535770725, + "learning_rate": 1.8175737863754706e-05, + "loss": 0.3612895905971527, + "step": 955 + }, + { + "epoch": 0.4725071049054739, + "grad_norm": 1.057430538694607, + "learning_rate": 1.817103031350577e-05, + "loss": 0.34393271803855896, + "step": 956 + }, + { + "epoch": 0.473001359199308, + "grad_norm": 1.0983705860238564, + "learning_rate": 1.8166317308393745e-05, + "loss": 0.3824620544910431, + "step": 957 + }, + { + "epoch": 0.47349561349314223, + "grad_norm": 1.0093831974265368, + "learning_rate": 1.816159885156497e-05, + "loss": 0.3092145621776581, + "step": 958 + }, + { + "epoch": 0.4739898677869764, + "grad_norm": 0.9971938324913802, + "learning_rate": 1.8156874946169414e-05, + "loss": 0.3328183889389038, + "step": 959 + }, + { + "epoch": 0.4744841220808106, + "grad_norm": 1.1071894513842127, + "learning_rate": 1.815214559536069e-05, + "loss": 0.3715244233608246, + "step": 960 + }, + { + "epoch": 0.47497837637464474, + "grad_norm": 0.9615506144211561, + "learning_rate": 1.814741080229605e-05, + "loss": 0.31065690517425537, + "step": 961 + }, + { + "epoch": 0.47547263066847895, + "grad_norm": 1.0443475280559777, + "learning_rate": 1.814267057013637e-05, + "loss": 0.3632475733757019, + "step": 962 + }, + { + "epoch": 0.4759668849623131, + "grad_norm": 1.0447314581931118, + "learning_rate": 1.813792490204616e-05, + "loss": 0.3367992043495178, + "step": 963 + }, + { + "epoch": 0.4764611392561473, + "grad_norm": 3.0902704784337263, + "learning_rate": 1.813317380119356e-05, + "loss": 0.37678295373916626, + "step": 964 + }, + { + "epoch": 0.47695539354998145, + "grad_norm": 1.092515860835368, + "learning_rate": 1.8128417270750342e-05, + "loss": 0.31454166769981384, + "step": 965 + }, + { + "epoch": 0.47744964784381566, + "grad_norm": 1.1351912635055343, + "learning_rate": 1.81236553138919e-05, + "loss": 0.38495004177093506, + "step": 966 + }, + { + "epoch": 0.4779439021376498, + "grad_norm": 1.1935841314497264, + "learning_rate": 1.8118887933797237e-05, + "loss": 0.3867315948009491, + "step": 967 + }, + { + "epoch": 0.478438156431484, + "grad_norm": 1.0520609240642282, + "learning_rate": 1.8114115133648996e-05, + "loss": 0.3453156650066376, + "step": 968 + }, + { + "epoch": 0.47893241072531817, + "grad_norm": 1.0244115852831113, + "learning_rate": 1.8109336916633426e-05, + "loss": 0.34461456537246704, + "step": 969 + }, + { + "epoch": 0.4794266650191524, + "grad_norm": 1.0814329785787762, + "learning_rate": 1.8104553285940404e-05, + "loss": 0.36489856243133545, + "step": 970 + }, + { + "epoch": 0.4799209193129865, + "grad_norm": 1.0551232871498393, + "learning_rate": 1.80997642447634e-05, + "loss": 0.3596840500831604, + "step": 971 + }, + { + "epoch": 0.48041517360682073, + "grad_norm": 1.1473167291229827, + "learning_rate": 1.8094969796299527e-05, + "loss": 0.3856956362724304, + "step": 972 + }, + { + "epoch": 0.4809094279006549, + "grad_norm": 1.036679746340059, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.3235170245170593, + "step": 973 + }, + { + "epoch": 0.4814036821944891, + "grad_norm": 0.9980037023378185, + "learning_rate": 1.8085364690317564e-05, + "loss": 0.28033584356307983, + "step": 974 + }, + { + "epoch": 0.48189793648832324, + "grad_norm": 1.0350899218465197, + "learning_rate": 1.808055403921171e-05, + "loss": 0.3279935419559479, + "step": 975 + }, + { + "epoch": 0.4823921907821574, + "grad_norm": 1.1400322966724836, + "learning_rate": 1.8075737993643442e-05, + "loss": 0.36426058411598206, + "step": 976 + }, + { + "epoch": 0.4828864450759916, + "grad_norm": 1.062964412180167, + "learning_rate": 1.8070916556827876e-05, + "loss": 0.3720256984233856, + "step": 977 + }, + { + "epoch": 0.48338069936982575, + "grad_norm": 1.1101144076762623, + "learning_rate": 1.8066089731983735e-05, + "loss": 0.3299727439880371, + "step": 978 + }, + { + "epoch": 0.48387495366365996, + "grad_norm": 1.1080862284860111, + "learning_rate": 1.8061257522333338e-05, + "loss": 0.3425888419151306, + "step": 979 + }, + { + "epoch": 0.4843692079574941, + "grad_norm": 1.1899160965861721, + "learning_rate": 1.80564199311026e-05, + "loss": 0.34109392762184143, + "step": 980 + }, + { + "epoch": 0.4848634622513283, + "grad_norm": 1.017538963669655, + "learning_rate": 1.805157696152103e-05, + "loss": 0.29130926728248596, + "step": 981 + }, + { + "epoch": 0.48535771654516247, + "grad_norm": 1.1092378859222098, + "learning_rate": 1.8046728616821726e-05, + "loss": 0.36200815439224243, + "step": 982 + }, + { + "epoch": 0.48585197083899667, + "grad_norm": 1.3150178990962822, + "learning_rate": 1.8041874900241368e-05, + "loss": 0.3343828320503235, + "step": 983 + }, + { + "epoch": 0.4863462251328308, + "grad_norm": 0.9882024578614582, + "learning_rate": 1.803701581502023e-05, + "loss": 0.32942160964012146, + "step": 984 + }, + { + "epoch": 0.48684047942666503, + "grad_norm": 0.9909863431121513, + "learning_rate": 1.803215136440217e-05, + "loss": 0.34390491247177124, + "step": 985 + }, + { + "epoch": 0.4873347337204992, + "grad_norm": 1.1118778887065912, + "learning_rate": 1.8027281551634622e-05, + "loss": 0.37723374366760254, + "step": 986 + }, + { + "epoch": 0.4878289880143334, + "grad_norm": 1.0469525424396737, + "learning_rate": 1.802240637996861e-05, + "loss": 0.3493693470954895, + "step": 987 + }, + { + "epoch": 0.48832324230816754, + "grad_norm": 1.131021341780466, + "learning_rate": 1.8017525852658723e-05, + "loss": 0.3564317524433136, + "step": 988 + }, + { + "epoch": 0.48881749660200174, + "grad_norm": 1.0435634175515676, + "learning_rate": 1.8012639972963136e-05, + "loss": 0.36572349071502686, + "step": 989 + }, + { + "epoch": 0.4893117508958359, + "grad_norm": 1.0078714155476896, + "learning_rate": 1.8007748744143586e-05, + "loss": 0.31457674503326416, + "step": 990 + }, + { + "epoch": 0.4898060051896701, + "grad_norm": 1.1126722971991523, + "learning_rate": 1.8002852169465393e-05, + "loss": 0.36191096901893616, + "step": 991 + }, + { + "epoch": 0.49030025948350425, + "grad_norm": 1.0321930748215848, + "learning_rate": 1.799795025219744e-05, + "loss": 0.33284491300582886, + "step": 992 + }, + { + "epoch": 0.49079451377733846, + "grad_norm": 1.0239281284644144, + "learning_rate": 1.7993042995612172e-05, + "loss": 0.3101437985897064, + "step": 993 + }, + { + "epoch": 0.4912887680711726, + "grad_norm": 1.033291904553078, + "learning_rate": 1.7988130402985608e-05, + "loss": 0.3196948170661926, + "step": 994 + }, + { + "epoch": 0.4917830223650068, + "grad_norm": 1.1489266069218314, + "learning_rate": 1.7983212477597325e-05, + "loss": 0.3757585883140564, + "step": 995 + }, + { + "epoch": 0.49227727665884097, + "grad_norm": 1.1725728838471274, + "learning_rate": 1.7978289222730454e-05, + "loss": 0.3949659466743469, + "step": 996 + }, + { + "epoch": 0.4927715309526752, + "grad_norm": 1.1279800728609437, + "learning_rate": 1.79733606416717e-05, + "loss": 0.3490184545516968, + "step": 997 + }, + { + "epoch": 0.4932657852465093, + "grad_norm": 1.2158784468170585, + "learning_rate": 1.7968426737711304e-05, + "loss": 0.32302743196487427, + "step": 998 + }, + { + "epoch": 0.49376003954034353, + "grad_norm": 1.1923748239050125, + "learning_rate": 1.7963487514143073e-05, + "loss": 0.4205089807510376, + "step": 999 + }, + { + "epoch": 0.4942542938341777, + "grad_norm": 1.0997609009048648, + "learning_rate": 1.7958542974264363e-05, + "loss": 0.30787885189056396, + "step": 1000 + }, + { + "epoch": 0.4947485481280119, + "grad_norm": 0.9527130505595168, + "learning_rate": 1.7953593121376075e-05, + "loss": 0.3174916207790375, + "step": 1001 + }, + { + "epoch": 0.49524280242184604, + "grad_norm": 0.9736659707101099, + "learning_rate": 1.7948637958782662e-05, + "loss": 0.330039381980896, + "step": 1002 + }, + { + "epoch": 0.4957370567156802, + "grad_norm": 1.0487288206783625, + "learning_rate": 1.794367748979212e-05, + "loss": 0.3362613320350647, + "step": 1003 + }, + { + "epoch": 0.4962313110095144, + "grad_norm": 1.065682818958373, + "learning_rate": 1.793871171771599e-05, + "loss": 0.3479865789413452, + "step": 1004 + }, + { + "epoch": 0.49672556530334855, + "grad_norm": 1.0920057715386207, + "learning_rate": 1.7933740645869345e-05, + "loss": 0.361303448677063, + "step": 1005 + }, + { + "epoch": 0.49721981959718276, + "grad_norm": 1.07605927747069, + "learning_rate": 1.79287642775708e-05, + "loss": 0.32340794801712036, + "step": 1006 + }, + { + "epoch": 0.4977140738910169, + "grad_norm": 1.086462795838887, + "learning_rate": 1.792378261614252e-05, + "loss": 0.3410148620605469, + "step": 1007 + }, + { + "epoch": 0.4982083281848511, + "grad_norm": 1.0450045575623719, + "learning_rate": 1.791879566491018e-05, + "loss": 0.3332127034664154, + "step": 1008 + }, + { + "epoch": 0.49870258247868526, + "grad_norm": 1.1673390171795246, + "learning_rate": 1.7913803427202998e-05, + "loss": 0.36532774567604065, + "step": 1009 + }, + { + "epoch": 0.49919683677251947, + "grad_norm": 1.1838892890378474, + "learning_rate": 1.7908805906353725e-05, + "loss": 0.3721959888935089, + "step": 1010 + }, + { + "epoch": 0.4996910910663536, + "grad_norm": 0.990806411218012, + "learning_rate": 1.7903803105698627e-05, + "loss": 0.3406672477722168, + "step": 1011 + }, + { + "epoch": 0.5001853453601878, + "grad_norm": 1.0152890264941994, + "learning_rate": 1.789879502857751e-05, + "loss": 0.323926717042923, + "step": 1012 + }, + { + "epoch": 0.500679599654022, + "grad_norm": 1.082078334287421, + "learning_rate": 1.7893781678333694e-05, + "loss": 0.36245018243789673, + "step": 1013 + }, + { + "epoch": 0.5011738539478562, + "grad_norm": 1.1363612319173766, + "learning_rate": 1.7888763058314016e-05, + "loss": 0.36145877838134766, + "step": 1014 + }, + { + "epoch": 0.5016681082416904, + "grad_norm": 0.9479821815236287, + "learning_rate": 1.788373917186884e-05, + "loss": 0.31398001313209534, + "step": 1015 + }, + { + "epoch": 0.5021623625355245, + "grad_norm": 1.0634976007398544, + "learning_rate": 1.7878710022352033e-05, + "loss": 0.36732447147369385, + "step": 1016 + }, + { + "epoch": 0.5026566168293587, + "grad_norm": 1.0888289854290114, + "learning_rate": 1.787367561312099e-05, + "loss": 0.3336929678916931, + "step": 1017 + }, + { + "epoch": 0.5031508711231929, + "grad_norm": 1.081948070644993, + "learning_rate": 1.786863594753661e-05, + "loss": 0.33306068181991577, + "step": 1018 + }, + { + "epoch": 0.5036451254170271, + "grad_norm": 1.1710814753085148, + "learning_rate": 1.7863591028963297e-05, + "loss": 0.32577213644981384, + "step": 1019 + }, + { + "epoch": 0.5041393797108612, + "grad_norm": 1.0902819718302648, + "learning_rate": 1.7858540860768974e-05, + "loss": 0.33542972803115845, + "step": 1020 + }, + { + "epoch": 0.5046336340046954, + "grad_norm": 1.1116685663765398, + "learning_rate": 1.7853485446325055e-05, + "loss": 0.3075249195098877, + "step": 1021 + }, + { + "epoch": 0.5051278882985296, + "grad_norm": 1.135601263046101, + "learning_rate": 1.7848424789006466e-05, + "loss": 0.3473510146141052, + "step": 1022 + }, + { + "epoch": 0.5056221425923638, + "grad_norm": 1.2152682076096186, + "learning_rate": 1.784335889219163e-05, + "loss": 0.3543929159641266, + "step": 1023 + }, + { + "epoch": 0.5061163968861979, + "grad_norm": 1.026549045591816, + "learning_rate": 1.783828775926246e-05, + "loss": 0.3198593556880951, + "step": 1024 + }, + { + "epoch": 0.5066106511800321, + "grad_norm": 1.07796975394457, + "learning_rate": 1.783321139360438e-05, + "loss": 0.34223973751068115, + "step": 1025 + }, + { + "epoch": 0.5071049054738663, + "grad_norm": 1.2487195797385122, + "learning_rate": 1.78281297986063e-05, + "loss": 0.3895387351512909, + "step": 1026 + }, + { + "epoch": 0.5075991597677005, + "grad_norm": 1.0333211037977794, + "learning_rate": 1.782304297766061e-05, + "loss": 0.35764580965042114, + "step": 1027 + }, + { + "epoch": 0.5080934140615346, + "grad_norm": 0.9679048017438919, + "learning_rate": 1.7817950934163213e-05, + "loss": 0.30859488248825073, + "step": 1028 + }, + { + "epoch": 0.5085876683553688, + "grad_norm": 1.0913185130679384, + "learning_rate": 1.7812853671513472e-05, + "loss": 0.3554389476776123, + "step": 1029 + }, + { + "epoch": 0.509081922649203, + "grad_norm": 1.0101463789736986, + "learning_rate": 1.7807751193114254e-05, + "loss": 0.3528766632080078, + "step": 1030 + }, + { + "epoch": 0.5095761769430371, + "grad_norm": 1.054067237260528, + "learning_rate": 1.78026435023719e-05, + "loss": 0.3645275831222534, + "step": 1031 + }, + { + "epoch": 0.5100704312368713, + "grad_norm": 1.338540047449502, + "learning_rate": 1.779753060269623e-05, + "loss": 0.3137075901031494, + "step": 1032 + }, + { + "epoch": 0.5105646855307056, + "grad_norm": 1.0928434325752037, + "learning_rate": 1.7792412497500538e-05, + "loss": 0.31993091106414795, + "step": 1033 + }, + { + "epoch": 0.5110589398245398, + "grad_norm": 1.032718640643118, + "learning_rate": 1.7787289190201606e-05, + "loss": 0.3514295220375061, + "step": 1034 + }, + { + "epoch": 0.5115531941183739, + "grad_norm": 0.9529992201270954, + "learning_rate": 1.7782160684219677e-05, + "loss": 0.3167670667171478, + "step": 1035 + }, + { + "epoch": 0.5120474484122081, + "grad_norm": 1.1056391999630892, + "learning_rate": 1.7777026982978473e-05, + "loss": 0.3298097252845764, + "step": 1036 + }, + { + "epoch": 0.5125417027060423, + "grad_norm": 1.008539858185866, + "learning_rate": 1.777188808990517e-05, + "loss": 0.3334948420524597, + "step": 1037 + }, + { + "epoch": 0.5130359569998765, + "grad_norm": 1.1451382861648118, + "learning_rate": 1.776674400843043e-05, + "loss": 0.3705115020275116, + "step": 1038 + }, + { + "epoch": 0.5135302112937106, + "grad_norm": 1.2062150323771585, + "learning_rate": 1.7761594741988356e-05, + "loss": 0.3586978614330292, + "step": 1039 + }, + { + "epoch": 0.5140244655875448, + "grad_norm": 0.9949081741462515, + "learning_rate": 1.7756440294016535e-05, + "loss": 0.3105466663837433, + "step": 1040 + }, + { + "epoch": 0.514518719881379, + "grad_norm": 1.240576049327348, + "learning_rate": 1.7751280667956002e-05, + "loss": 0.35213470458984375, + "step": 1041 + }, + { + "epoch": 0.5150129741752132, + "grad_norm": 1.1494264660428748, + "learning_rate": 1.7746115867251245e-05, + "loss": 0.3830525875091553, + "step": 1042 + }, + { + "epoch": 0.5155072284690473, + "grad_norm": 1.044917786849415, + "learning_rate": 1.7740945895350215e-05, + "loss": 0.34106165170669556, + "step": 1043 + }, + { + "epoch": 0.5160014827628815, + "grad_norm": 0.9456529066854209, + "learning_rate": 1.773577075570431e-05, + "loss": 0.33408549427986145, + "step": 1044 + }, + { + "epoch": 0.5164957370567157, + "grad_norm": 1.057634132461443, + "learning_rate": 1.7730590451768375e-05, + "loss": 0.32823115587234497, + "step": 1045 + }, + { + "epoch": 0.5169899913505499, + "grad_norm": 0.9870247990943719, + "learning_rate": 1.7725404987000716e-05, + "loss": 0.2866591811180115, + "step": 1046 + }, + { + "epoch": 0.517484245644384, + "grad_norm": 1.0669638645996897, + "learning_rate": 1.772021436486307e-05, + "loss": 0.34053099155426025, + "step": 1047 + }, + { + "epoch": 0.5179784999382182, + "grad_norm": 1.0384310943814752, + "learning_rate": 1.771501858882062e-05, + "loss": 0.30379486083984375, + "step": 1048 + }, + { + "epoch": 0.5184727542320524, + "grad_norm": 1.299899967945095, + "learning_rate": 1.7709817662341998e-05, + "loss": 0.37569302320480347, + "step": 1049 + }, + { + "epoch": 0.5189670085258866, + "grad_norm": 1.0489606422309163, + "learning_rate": 1.770461158889926e-05, + "loss": 0.31770390272140503, + "step": 1050 + }, + { + "epoch": 0.5194612628197207, + "grad_norm": 1.1640089464310481, + "learning_rate": 1.769940037196791e-05, + "loss": 0.34175002574920654, + "step": 1051 + }, + { + "epoch": 0.5199555171135549, + "grad_norm": 1.0797819699416114, + "learning_rate": 1.769418401502689e-05, + "loss": 0.3634580671787262, + "step": 1052 + }, + { + "epoch": 0.5204497714073891, + "grad_norm": 1.1990448584577926, + "learning_rate": 1.7688962521558554e-05, + "loss": 0.3631044030189514, + "step": 1053 + }, + { + "epoch": 0.5209440257012233, + "grad_norm": 1.2482048374766477, + "learning_rate": 1.7683735895048698e-05, + "loss": 0.3402160704135895, + "step": 1054 + }, + { + "epoch": 0.5214382799950574, + "grad_norm": 1.2190765212037056, + "learning_rate": 1.7678504138986548e-05, + "loss": 0.3895665407180786, + "step": 1055 + }, + { + "epoch": 0.5219325342888916, + "grad_norm": 1.076846194861831, + "learning_rate": 1.767326725686475e-05, + "loss": 0.32207030057907104, + "step": 1056 + }, + { + "epoch": 0.5224267885827258, + "grad_norm": 1.10282378456951, + "learning_rate": 1.7668025252179363e-05, + "loss": 0.33095866441726685, + "step": 1057 + }, + { + "epoch": 0.5229210428765599, + "grad_norm": 1.1487800022178571, + "learning_rate": 1.7662778128429883e-05, + "loss": 0.33239442110061646, + "step": 1058 + }, + { + "epoch": 0.5234152971703941, + "grad_norm": 0.9873637767970463, + "learning_rate": 1.7657525889119212e-05, + "loss": 0.27432021498680115, + "step": 1059 + }, + { + "epoch": 0.5239095514642284, + "grad_norm": 1.0928994862368866, + "learning_rate": 1.7652268537753672e-05, + "loss": 0.3221333622932434, + "step": 1060 + }, + { + "epoch": 0.5244038057580626, + "grad_norm": 1.114838100134283, + "learning_rate": 1.764700607784299e-05, + "loss": 0.3126341700553894, + "step": 1061 + }, + { + "epoch": 0.5248980600518967, + "grad_norm": 1.0401864286303986, + "learning_rate": 1.7641738512900315e-05, + "loss": 0.33239883184432983, + "step": 1062 + }, + { + "epoch": 0.5253923143457309, + "grad_norm": 0.9509614150111031, + "learning_rate": 1.7636465846442197e-05, + "loss": 0.30075010657310486, + "step": 1063 + }, + { + "epoch": 0.5258865686395651, + "grad_norm": 1.0717488761603333, + "learning_rate": 1.763118808198859e-05, + "loss": 0.3577713370323181, + "step": 1064 + }, + { + "epoch": 0.5263808229333993, + "grad_norm": 1.0802706273753335, + "learning_rate": 1.7625905223062858e-05, + "loss": 0.3483964204788208, + "step": 1065 + }, + { + "epoch": 0.5268750772272334, + "grad_norm": 1.1651963376515642, + "learning_rate": 1.762061727319176e-05, + "loss": 0.3622454106807709, + "step": 1066 + }, + { + "epoch": 0.5273693315210676, + "grad_norm": 1.0440643033385941, + "learning_rate": 1.761532423590545e-05, + "loss": 0.35156917572021484, + "step": 1067 + }, + { + "epoch": 0.5278635858149018, + "grad_norm": 1.1589394381083906, + "learning_rate": 1.7610026114737498e-05, + "loss": 0.3413820266723633, + "step": 1068 + }, + { + "epoch": 0.528357840108736, + "grad_norm": 1.1280561588615983, + "learning_rate": 1.760472291322484e-05, + "loss": 0.3707934021949768, + "step": 1069 + }, + { + "epoch": 0.5288520944025701, + "grad_norm": 1.2170503232061094, + "learning_rate": 1.7599414634907828e-05, + "loss": 0.3472951054573059, + "step": 1070 + }, + { + "epoch": 0.5293463486964043, + "grad_norm": 1.1676650140216285, + "learning_rate": 1.7594101283330184e-05, + "loss": 0.393882155418396, + "step": 1071 + }, + { + "epoch": 0.5298406029902385, + "grad_norm": 0.9683606994511744, + "learning_rate": 1.758878286203903e-05, + "loss": 0.3094913065433502, + "step": 1072 + }, + { + "epoch": 0.5303348572840727, + "grad_norm": 1.09347684867524, + "learning_rate": 1.758345937458487e-05, + "loss": 0.33904048800468445, + "step": 1073 + }, + { + "epoch": 0.5308291115779068, + "grad_norm": 1.0218184375103434, + "learning_rate": 1.7578130824521585e-05, + "loss": 0.3218901753425598, + "step": 1074 + }, + { + "epoch": 0.531323365871741, + "grad_norm": 0.95615697696865, + "learning_rate": 1.7572797215406442e-05, + "loss": 0.31584852933883667, + "step": 1075 + }, + { + "epoch": 0.5318176201655752, + "grad_norm": 0.9682503945021611, + "learning_rate": 1.756745855080008e-05, + "loss": 0.3449877202510834, + "step": 1076 + }, + { + "epoch": 0.5323118744594094, + "grad_norm": 1.084607183777355, + "learning_rate": 1.756211483426651e-05, + "loss": 0.3544886112213135, + "step": 1077 + }, + { + "epoch": 0.5328061287532435, + "grad_norm": 1.1680618553038933, + "learning_rate": 1.755676606937313e-05, + "loss": 0.34360697865486145, + "step": 1078 + }, + { + "epoch": 0.5333003830470777, + "grad_norm": 1.0514045755368502, + "learning_rate": 1.7551412259690695e-05, + "loss": 0.3214710056781769, + "step": 1079 + }, + { + "epoch": 0.5337946373409119, + "grad_norm": 0.9951048830690797, + "learning_rate": 1.754605340879333e-05, + "loss": 0.33841896057128906, + "step": 1080 + }, + { + "epoch": 0.534288891634746, + "grad_norm": 1.0536673015942455, + "learning_rate": 1.7540689520258532e-05, + "loss": 0.3134745657444, + "step": 1081 + }, + { + "epoch": 0.5347831459285802, + "grad_norm": 1.1773503335041235, + "learning_rate": 1.753532059766715e-05, + "loss": 0.3469204306602478, + "step": 1082 + }, + { + "epoch": 0.5352774002224144, + "grad_norm": 1.3802140663046265, + "learning_rate": 1.752994664460341e-05, + "loss": 0.39217621088027954, + "step": 1083 + }, + { + "epoch": 0.5357716545162486, + "grad_norm": 1.148906185686213, + "learning_rate": 1.7524567664654873e-05, + "loss": 0.34482622146606445, + "step": 1084 + }, + { + "epoch": 0.5362659088100827, + "grad_norm": 1.0089175831530743, + "learning_rate": 1.751918366141248e-05, + "loss": 0.308369517326355, + "step": 1085 + }, + { + "epoch": 0.5367601631039169, + "grad_norm": 1.1441511379564429, + "learning_rate": 1.751379463847051e-05, + "loss": 0.3396676480770111, + "step": 1086 + }, + { + "epoch": 0.5372544173977511, + "grad_norm": 1.0963418237920814, + "learning_rate": 1.7508400599426596e-05, + "loss": 0.3059370517730713, + "step": 1087 + }, + { + "epoch": 0.5377486716915854, + "grad_norm": 0.993693807257297, + "learning_rate": 1.7503001547881728e-05, + "loss": 0.31689077615737915, + "step": 1088 + }, + { + "epoch": 0.5382429259854195, + "grad_norm": 1.2996366258679217, + "learning_rate": 1.749759748744023e-05, + "loss": 0.37134337425231934, + "step": 1089 + }, + { + "epoch": 0.5387371802792537, + "grad_norm": 1.0586799377490923, + "learning_rate": 1.7492188421709775e-05, + "loss": 0.30404967069625854, + "step": 1090 + }, + { + "epoch": 0.5392314345730879, + "grad_norm": 1.1213884593031693, + "learning_rate": 1.7486774354301382e-05, + "loss": 0.34773269295692444, + "step": 1091 + }, + { + "epoch": 0.5397256888669221, + "grad_norm": 1.135256212480744, + "learning_rate": 1.7481355288829404e-05, + "loss": 0.34448760747909546, + "step": 1092 + }, + { + "epoch": 0.5402199431607562, + "grad_norm": 1.1111138178806874, + "learning_rate": 1.7475931228911526e-05, + "loss": 0.33557915687561035, + "step": 1093 + }, + { + "epoch": 0.5407141974545904, + "grad_norm": 1.1277612406863344, + "learning_rate": 1.7470502178168783e-05, + "loss": 0.3216322362422943, + "step": 1094 + }, + { + "epoch": 0.5412084517484246, + "grad_norm": 1.1416777218141756, + "learning_rate": 1.7465068140225524e-05, + "loss": 0.3175346255302429, + "step": 1095 + }, + { + "epoch": 0.5417027060422588, + "grad_norm": 1.0466005920407673, + "learning_rate": 1.7459629118709435e-05, + "loss": 0.3150678277015686, + "step": 1096 + }, + { + "epoch": 0.5421969603360929, + "grad_norm": 1.1080261557130098, + "learning_rate": 1.7454185117251534e-05, + "loss": 0.3372325897216797, + "step": 1097 + }, + { + "epoch": 0.5426912146299271, + "grad_norm": 1.1607395393986693, + "learning_rate": 1.7448736139486156e-05, + "loss": 0.3460095524787903, + "step": 1098 + }, + { + "epoch": 0.5431854689237613, + "grad_norm": 1.0960477562857334, + "learning_rate": 1.7443282189050964e-05, + "loss": 0.3465900421142578, + "step": 1099 + }, + { + "epoch": 0.5436797232175955, + "grad_norm": 1.1271957826518202, + "learning_rate": 1.7437823269586925e-05, + "loss": 0.3707941174507141, + "step": 1100 + }, + { + "epoch": 0.5441739775114296, + "grad_norm": 1.0732325510644303, + "learning_rate": 1.7432359384738354e-05, + "loss": 0.3317713141441345, + "step": 1101 + }, + { + "epoch": 0.5446682318052638, + "grad_norm": 1.10075448775578, + "learning_rate": 1.742689053815285e-05, + "loss": 0.3391956090927124, + "step": 1102 + }, + { + "epoch": 0.545162486099098, + "grad_norm": 1.483156522178114, + "learning_rate": 1.742141673348134e-05, + "loss": 0.3838513195514679, + "step": 1103 + }, + { + "epoch": 0.5456567403929322, + "grad_norm": 1.2368776155357775, + "learning_rate": 1.7415937974378057e-05, + "loss": 0.4438849687576294, + "step": 1104 + }, + { + "epoch": 0.5461509946867663, + "grad_norm": 1.1360365035496875, + "learning_rate": 1.7410454264500542e-05, + "loss": 0.35329896211624146, + "step": 1105 + }, + { + "epoch": 0.5466452489806005, + "grad_norm": 0.9946710480219276, + "learning_rate": 1.7404965607509646e-05, + "loss": 0.3124481439590454, + "step": 1106 + }, + { + "epoch": 0.5471395032744347, + "grad_norm": 1.1827285369169889, + "learning_rate": 1.739947200706951e-05, + "loss": 0.3595995008945465, + "step": 1107 + }, + { + "epoch": 0.5476337575682688, + "grad_norm": 1.0771205850736374, + "learning_rate": 1.7393973466847592e-05, + "loss": 0.35914891958236694, + "step": 1108 + }, + { + "epoch": 0.548128011862103, + "grad_norm": 1.0372075645038734, + "learning_rate": 1.7388469990514636e-05, + "loss": 0.34034737944602966, + "step": 1109 + }, + { + "epoch": 0.5486222661559372, + "grad_norm": 0.9639792162761298, + "learning_rate": 1.7382961581744677e-05, + "loss": 0.3033643066883087, + "step": 1110 + }, + { + "epoch": 0.5491165204497714, + "grad_norm": 1.0333536833038373, + "learning_rate": 1.737744824421506e-05, + "loss": 0.3239862322807312, + "step": 1111 + }, + { + "epoch": 0.5496107747436055, + "grad_norm": 1.0992782883377998, + "learning_rate": 1.7371929981606403e-05, + "loss": 0.36473411321640015, + "step": 1112 + }, + { + "epoch": 0.5501050290374397, + "grad_norm": 0.9808971248907185, + "learning_rate": 1.7366406797602625e-05, + "loss": 0.3129761517047882, + "step": 1113 + }, + { + "epoch": 0.550599283331274, + "grad_norm": 1.0031500416462213, + "learning_rate": 1.736087869589092e-05, + "loss": 0.30224812030792236, + "step": 1114 + }, + { + "epoch": 0.5510935376251082, + "grad_norm": 1.0008522519559948, + "learning_rate": 1.7355345680161774e-05, + "loss": 0.30045247077941895, + "step": 1115 + }, + { + "epoch": 0.5515877919189422, + "grad_norm": 1.1079372723945795, + "learning_rate": 1.7349807754108944e-05, + "loss": 0.3356926739215851, + "step": 1116 + }, + { + "epoch": 0.5520820462127765, + "grad_norm": 1.3704982317685879, + "learning_rate": 1.7344264921429475e-05, + "loss": 0.37749868631362915, + "step": 1117 + }, + { + "epoch": 0.5525763005066107, + "grad_norm": 1.0400914273370205, + "learning_rate": 1.733871718582368e-05, + "loss": 0.331012099981308, + "step": 1118 + }, + { + "epoch": 0.5530705548004449, + "grad_norm": 1.2654046748606915, + "learning_rate": 1.7333164550995153e-05, + "loss": 0.3557187020778656, + "step": 1119 + }, + { + "epoch": 0.553564809094279, + "grad_norm": 1.151377810019934, + "learning_rate": 1.7327607020650744e-05, + "loss": 0.34102991223335266, + "step": 1120 + }, + { + "epoch": 0.5540590633881132, + "grad_norm": 1.0397881413898085, + "learning_rate": 1.7322044598500594e-05, + "loss": 0.328019917011261, + "step": 1121 + }, + { + "epoch": 0.5545533176819474, + "grad_norm": 1.0773058589187376, + "learning_rate": 1.7316477288258085e-05, + "loss": 0.33980751037597656, + "step": 1122 + }, + { + "epoch": 0.5550475719757816, + "grad_norm": 1.1823119583137516, + "learning_rate": 1.731090509363988e-05, + "loss": 0.3460109233856201, + "step": 1123 + }, + { + "epoch": 0.5555418262696157, + "grad_norm": 1.0727245460190564, + "learning_rate": 1.730532801836589e-05, + "loss": 0.3013002276420593, + "step": 1124 + }, + { + "epoch": 0.5560360805634499, + "grad_norm": 1.191952525403325, + "learning_rate": 1.72997460661593e-05, + "loss": 0.36195772886276245, + "step": 1125 + }, + { + "epoch": 0.5565303348572841, + "grad_norm": 1.1481571926267522, + "learning_rate": 1.7294159240746532e-05, + "loss": 0.3368675112724304, + "step": 1126 + }, + { + "epoch": 0.5570245891511183, + "grad_norm": 1.0950064938478345, + "learning_rate": 1.7288567545857283e-05, + "loss": 0.36618539690971375, + "step": 1127 + }, + { + "epoch": 0.5575188434449524, + "grad_norm": 1.0773610015009678, + "learning_rate": 1.7282970985224477e-05, + "loss": 0.3230215311050415, + "step": 1128 + }, + { + "epoch": 0.5580130977387866, + "grad_norm": 1.1539889538468413, + "learning_rate": 1.72773695625843e-05, + "loss": 0.38779711723327637, + "step": 1129 + }, + { + "epoch": 0.5585073520326208, + "grad_norm": 1.0853438524765577, + "learning_rate": 1.7271763281676187e-05, + "loss": 0.33910998702049255, + "step": 1130 + }, + { + "epoch": 0.559001606326455, + "grad_norm": 1.1265909455665821, + "learning_rate": 1.726615214624281e-05, + "loss": 0.3526651859283447, + "step": 1131 + }, + { + "epoch": 0.5594958606202891, + "grad_norm": 1.0899084132349224, + "learning_rate": 1.7260536160030077e-05, + "loss": 0.33794116973876953, + "step": 1132 + }, + { + "epoch": 0.5599901149141233, + "grad_norm": 1.2383181058563666, + "learning_rate": 1.7254915326787145e-05, + "loss": 0.3294123411178589, + "step": 1133 + }, + { + "epoch": 0.5604843692079575, + "grad_norm": 1.0381296685245769, + "learning_rate": 1.7249289650266402e-05, + "loss": 0.31193166971206665, + "step": 1134 + }, + { + "epoch": 0.5609786235017916, + "grad_norm": 1.0273514183990056, + "learning_rate": 1.7243659134223467e-05, + "loss": 0.298290491104126, + "step": 1135 + }, + { + "epoch": 0.5614728777956258, + "grad_norm": 1.0372406743131939, + "learning_rate": 1.7238023782417194e-05, + "loss": 0.3157176971435547, + "step": 1136 + }, + { + "epoch": 0.56196713208946, + "grad_norm": 0.9703670449018593, + "learning_rate": 1.7232383598609664e-05, + "loss": 0.3152535855770111, + "step": 1137 + }, + { + "epoch": 0.5624613863832942, + "grad_norm": 1.1457741905911056, + "learning_rate": 1.722673858656618e-05, + "loss": 0.35004952549934387, + "step": 1138 + }, + { + "epoch": 0.5629556406771283, + "grad_norm": 1.2128755723830003, + "learning_rate": 1.722108875005527e-05, + "loss": 0.3531174957752228, + "step": 1139 + }, + { + "epoch": 0.5634498949709625, + "grad_norm": 0.9896343114056704, + "learning_rate": 1.7215434092848693e-05, + "loss": 0.32532358169555664, + "step": 1140 + }, + { + "epoch": 0.5639441492647967, + "grad_norm": 1.086973420033045, + "learning_rate": 1.7209774618721408e-05, + "loss": 0.3252495229244232, + "step": 1141 + }, + { + "epoch": 0.564438403558631, + "grad_norm": 1.1232225314649664, + "learning_rate": 1.7204110331451603e-05, + "loss": 0.35428208112716675, + "step": 1142 + }, + { + "epoch": 0.564932657852465, + "grad_norm": 1.165276028587328, + "learning_rate": 1.7198441234820674e-05, + "loss": 0.37419646978378296, + "step": 1143 + }, + { + "epoch": 0.5654269121462993, + "grad_norm": 1.1206339776354848, + "learning_rate": 1.7192767332613235e-05, + "loss": 0.3342249095439911, + "step": 1144 + }, + { + "epoch": 0.5659211664401335, + "grad_norm": 1.0700889667237288, + "learning_rate": 1.7187088628617093e-05, + "loss": 0.36827898025512695, + "step": 1145 + }, + { + "epoch": 0.5664154207339677, + "grad_norm": 1.1884715403984119, + "learning_rate": 1.7181405126623275e-05, + "loss": 0.3560858964920044, + "step": 1146 + }, + { + "epoch": 0.5669096750278018, + "grad_norm": 1.0578073497156413, + "learning_rate": 1.7175716830426005e-05, + "loss": 0.35333797335624695, + "step": 1147 + }, + { + "epoch": 0.567403929321636, + "grad_norm": 1.0504095801617317, + "learning_rate": 1.71700237438227e-05, + "loss": 0.31053799390792847, + "step": 1148 + }, + { + "epoch": 0.5678981836154702, + "grad_norm": 1.1443484208273471, + "learning_rate": 1.7164325870613998e-05, + "loss": 0.37123826146125793, + "step": 1149 + }, + { + "epoch": 0.5683924379093044, + "grad_norm": 1.069054169156011, + "learning_rate": 1.715862321460371e-05, + "loss": 0.33981990814208984, + "step": 1150 + }, + { + "epoch": 0.5688866922031385, + "grad_norm": 1.1295222791710222, + "learning_rate": 1.7152915779598846e-05, + "loss": 0.34938257932662964, + "step": 1151 + }, + { + "epoch": 0.5693809464969727, + "grad_norm": 1.10704413276648, + "learning_rate": 1.714720356940961e-05, + "loss": 0.3069387376308441, + "step": 1152 + }, + { + "epoch": 0.5698752007908069, + "grad_norm": 1.1206304490989205, + "learning_rate": 1.7141486587849397e-05, + "loss": 0.34879156947135925, + "step": 1153 + }, + { + "epoch": 0.5703694550846411, + "grad_norm": 1.140159647567344, + "learning_rate": 1.7135764838734773e-05, + "loss": 0.3624545931816101, + "step": 1154 + }, + { + "epoch": 0.5708637093784752, + "grad_norm": 1.0671159168894162, + "learning_rate": 1.7130038325885502e-05, + "loss": 0.3548320531845093, + "step": 1155 + }, + { + "epoch": 0.5713579636723094, + "grad_norm": 1.0469806768045702, + "learning_rate": 1.7124307053124518e-05, + "loss": 0.3004404902458191, + "step": 1156 + }, + { + "epoch": 0.5718522179661436, + "grad_norm": 1.1058227077648823, + "learning_rate": 1.7118571024277943e-05, + "loss": 0.31545472145080566, + "step": 1157 + }, + { + "epoch": 0.5723464722599778, + "grad_norm": 1.100412587450837, + "learning_rate": 1.711283024317506e-05, + "loss": 0.3116477429866791, + "step": 1158 + }, + { + "epoch": 0.5728407265538119, + "grad_norm": 1.1169526030822408, + "learning_rate": 1.710708471364834e-05, + "loss": 0.3472268581390381, + "step": 1159 + }, + { + "epoch": 0.5733349808476461, + "grad_norm": 1.1641407854241053, + "learning_rate": 1.7101334439533414e-05, + "loss": 0.33334046602249146, + "step": 1160 + }, + { + "epoch": 0.5738292351414803, + "grad_norm": 1.1720238639752558, + "learning_rate": 1.7095579424669074e-05, + "loss": 0.3462664783000946, + "step": 1161 + }, + { + "epoch": 0.5743234894353144, + "grad_norm": 1.0854325044336006, + "learning_rate": 1.7089819672897304e-05, + "loss": 0.3241977393627167, + "step": 1162 + }, + { + "epoch": 0.5748177437291486, + "grad_norm": 1.2501733360326688, + "learning_rate": 1.7084055188063217e-05, + "loss": 0.3194134533405304, + "step": 1163 + }, + { + "epoch": 0.5753119980229828, + "grad_norm": 1.1336053472715226, + "learning_rate": 1.7078285974015103e-05, + "loss": 0.3644179701805115, + "step": 1164 + }, + { + "epoch": 0.575806252316817, + "grad_norm": 1.1434067682408584, + "learning_rate": 1.7072512034604412e-05, + "loss": 0.36653730273246765, + "step": 1165 + }, + { + "epoch": 0.5763005066106511, + "grad_norm": 1.1221051792069954, + "learning_rate": 1.706673337368574e-05, + "loss": 0.3435714840888977, + "step": 1166 + }, + { + "epoch": 0.5767947609044853, + "grad_norm": 1.0603782757024258, + "learning_rate": 1.706094999511684e-05, + "loss": 0.36935871839523315, + "step": 1167 + }, + { + "epoch": 0.5772890151983195, + "grad_norm": 0.9845968090919184, + "learning_rate": 1.7055161902758607e-05, + "loss": 0.29493796825408936, + "step": 1168 + }, + { + "epoch": 0.5777832694921538, + "grad_norm": 1.0115254154804856, + "learning_rate": 1.70493691004751e-05, + "loss": 0.32378828525543213, + "step": 1169 + }, + { + "epoch": 0.5782775237859878, + "grad_norm": 1.1123861652198228, + "learning_rate": 1.70435715921335e-05, + "loss": 0.3587600588798523, + "step": 1170 + }, + { + "epoch": 0.578771778079822, + "grad_norm": 1.1091481408248292, + "learning_rate": 1.703776938160415e-05, + "loss": 0.31885826587677, + "step": 1171 + }, + { + "epoch": 0.5792660323736563, + "grad_norm": 1.0414979222224348, + "learning_rate": 1.7031962472760514e-05, + "loss": 0.2950041890144348, + "step": 1172 + }, + { + "epoch": 0.5797602866674905, + "grad_norm": 1.121100234384589, + "learning_rate": 1.7026150869479208e-05, + "loss": 0.36190298199653625, + "step": 1173 + }, + { + "epoch": 0.5802545409613246, + "grad_norm": 1.067632760047313, + "learning_rate": 1.7020334575639972e-05, + "loss": 0.3402514159679413, + "step": 1174 + }, + { + "epoch": 0.5807487952551588, + "grad_norm": 0.9679286148168113, + "learning_rate": 1.7014513595125684e-05, + "loss": 0.3131282925605774, + "step": 1175 + }, + { + "epoch": 0.581243049548993, + "grad_norm": 1.056786860676952, + "learning_rate": 1.7008687931822344e-05, + "loss": 0.29499226808547974, + "step": 1176 + }, + { + "epoch": 0.5817373038428272, + "grad_norm": 1.0712930292635054, + "learning_rate": 1.700285758961908e-05, + "loss": 0.36821871995925903, + "step": 1177 + }, + { + "epoch": 0.5822315581366613, + "grad_norm": 1.2780126948070993, + "learning_rate": 1.6997022572408152e-05, + "loss": 0.31486836075782776, + "step": 1178 + }, + { + "epoch": 0.5827258124304955, + "grad_norm": 1.0778384840117066, + "learning_rate": 1.6991182884084928e-05, + "loss": 0.3176078498363495, + "step": 1179 + }, + { + "epoch": 0.5832200667243297, + "grad_norm": 1.294300282858588, + "learning_rate": 1.69853385285479e-05, + "loss": 0.4130980968475342, + "step": 1180 + }, + { + "epoch": 0.5837143210181639, + "grad_norm": 1.103648457674251, + "learning_rate": 1.697948950969868e-05, + "loss": 0.3164641857147217, + "step": 1181 + }, + { + "epoch": 0.584208575311998, + "grad_norm": 1.1707357674613739, + "learning_rate": 1.697363583144199e-05, + "loss": 0.36420726776123047, + "step": 1182 + }, + { + "epoch": 0.5847028296058322, + "grad_norm": 1.1827091905189109, + "learning_rate": 1.696777749768566e-05, + "loss": 0.3279833197593689, + "step": 1183 + }, + { + "epoch": 0.5851970838996664, + "grad_norm": 1.2462082843052198, + "learning_rate": 1.696191451234063e-05, + "loss": 0.311473548412323, + "step": 1184 + }, + { + "epoch": 0.5856913381935006, + "grad_norm": 1.0514702517271486, + "learning_rate": 1.6956046879320943e-05, + "loss": 0.32284629344940186, + "step": 1185 + }, + { + "epoch": 0.5861855924873347, + "grad_norm": 1.081683685343838, + "learning_rate": 1.6950174602543753e-05, + "loss": 0.3318635821342468, + "step": 1186 + }, + { + "epoch": 0.5866798467811689, + "grad_norm": 1.10655975155716, + "learning_rate": 1.6944297685929298e-05, + "loss": 0.3268307149410248, + "step": 1187 + }, + { + "epoch": 0.5871741010750031, + "grad_norm": 1.1757413336808826, + "learning_rate": 1.6938416133400934e-05, + "loss": 0.31885889172554016, + "step": 1188 + }, + { + "epoch": 0.5876683553688372, + "grad_norm": 1.044019985672413, + "learning_rate": 1.69325299488851e-05, + "loss": 0.29273971915245056, + "step": 1189 + }, + { + "epoch": 0.5881626096626714, + "grad_norm": 1.2128861059808687, + "learning_rate": 1.692663913631132e-05, + "loss": 0.3585188388824463, + "step": 1190 + }, + { + "epoch": 0.5886568639565056, + "grad_norm": 1.152183266519285, + "learning_rate": 1.6920743699612226e-05, + "loss": 0.37145692110061646, + "step": 1191 + }, + { + "epoch": 0.5891511182503398, + "grad_norm": 1.1211663085079848, + "learning_rate": 1.691484364272352e-05, + "loss": 0.34805262088775635, + "step": 1192 + }, + { + "epoch": 0.5896453725441739, + "grad_norm": 1.1094913177494823, + "learning_rate": 1.6908938969584002e-05, + "loss": 0.3540152907371521, + "step": 1193 + }, + { + "epoch": 0.5901396268380081, + "grad_norm": 1.1138288622940957, + "learning_rate": 1.6903029684135545e-05, + "loss": 0.35808512568473816, + "step": 1194 + }, + { + "epoch": 0.5906338811318423, + "grad_norm": 1.2028693910668573, + "learning_rate": 1.68971157903231e-05, + "loss": 0.2881169021129608, + "step": 1195 + }, + { + "epoch": 0.5911281354256765, + "grad_norm": 1.126509020875868, + "learning_rate": 1.6891197292094704e-05, + "loss": 0.33551955223083496, + "step": 1196 + }, + { + "epoch": 0.5916223897195106, + "grad_norm": 1.0141998416691063, + "learning_rate": 1.688527419340146e-05, + "loss": 0.30721622705459595, + "step": 1197 + }, + { + "epoch": 0.5921166440133449, + "grad_norm": 1.0876501850612135, + "learning_rate": 1.687934649819754e-05, + "loss": 0.3296341300010681, + "step": 1198 + }, + { + "epoch": 0.5926108983071791, + "grad_norm": 1.1194456964334092, + "learning_rate": 1.6873414210440194e-05, + "loss": 0.3511606454849243, + "step": 1199 + }, + { + "epoch": 0.5931051526010133, + "grad_norm": 1.0762712673108126, + "learning_rate": 1.6867477334089728e-05, + "loss": 0.34293919801712036, + "step": 1200 + }, + { + "epoch": 0.5935994068948474, + "grad_norm": 0.9942852659141888, + "learning_rate": 1.686153587310952e-05, + "loss": 0.3334580659866333, + "step": 1201 + }, + { + "epoch": 0.5940936611886816, + "grad_norm": 1.1354238373080972, + "learning_rate": 1.6855589831466e-05, + "loss": 0.3542851209640503, + "step": 1202 + }, + { + "epoch": 0.5945879154825158, + "grad_norm": 1.0952906678959344, + "learning_rate": 1.6849639213128667e-05, + "loss": 0.30951520800590515, + "step": 1203 + }, + { + "epoch": 0.59508216977635, + "grad_norm": 1.0716710567299268, + "learning_rate": 1.6843684022070062e-05, + "loss": 0.333478718996048, + "step": 1204 + }, + { + "epoch": 0.5955764240701841, + "grad_norm": 1.0944556204789582, + "learning_rate": 1.683772426226579e-05, + "loss": 0.33562588691711426, + "step": 1205 + }, + { + "epoch": 0.5960706783640183, + "grad_norm": 0.9136596878493712, + "learning_rate": 1.6831759937694497e-05, + "loss": 0.2626678943634033, + "step": 1206 + }, + { + "epoch": 0.5965649326578525, + "grad_norm": 1.1138721974001247, + "learning_rate": 1.6825791052337884e-05, + "loss": 0.349543035030365, + "step": 1207 + }, + { + "epoch": 0.5970591869516867, + "grad_norm": 1.0760285856821303, + "learning_rate": 1.6819817610180696e-05, + "loss": 0.3229057788848877, + "step": 1208 + }, + { + "epoch": 0.5975534412455208, + "grad_norm": 1.0511960959262137, + "learning_rate": 1.681383961521071e-05, + "loss": 0.32023823261260986, + "step": 1209 + }, + { + "epoch": 0.598047695539355, + "grad_norm": 1.0122201188951288, + "learning_rate": 1.680785707141876e-05, + "loss": 0.31556791067123413, + "step": 1210 + }, + { + "epoch": 0.5985419498331892, + "grad_norm": 1.1858949236151264, + "learning_rate": 1.68018699827987e-05, + "loss": 0.33287158608436584, + "step": 1211 + }, + { + "epoch": 0.5990362041270234, + "grad_norm": 1.0276520854994282, + "learning_rate": 1.6795878353347427e-05, + "loss": 0.28690433502197266, + "step": 1212 + }, + { + "epoch": 0.5995304584208575, + "grad_norm": 1.1202382723881081, + "learning_rate": 1.6789882187064862e-05, + "loss": 0.3501484990119934, + "step": 1213 + }, + { + "epoch": 0.6000247127146917, + "grad_norm": 1.15016872261832, + "learning_rate": 1.678388148795397e-05, + "loss": 0.3645259439945221, + "step": 1214 + }, + { + "epoch": 0.6005189670085259, + "grad_norm": 1.0232559071014062, + "learning_rate": 1.6777876260020726e-05, + "loss": 0.3270183801651001, + "step": 1215 + }, + { + "epoch": 0.60101322130236, + "grad_norm": 1.0680433488207848, + "learning_rate": 1.6771866507274132e-05, + "loss": 0.31767967343330383, + "step": 1216 + }, + { + "epoch": 0.6015074755961942, + "grad_norm": 1.0642272352631703, + "learning_rate": 1.6765852233726216e-05, + "loss": 0.3170120120048523, + "step": 1217 + }, + { + "epoch": 0.6020017298900284, + "grad_norm": 1.0689193394735252, + "learning_rate": 1.6759833443392022e-05, + "loss": 0.3270176351070404, + "step": 1218 + }, + { + "epoch": 0.6024959841838626, + "grad_norm": 1.0053062396233938, + "learning_rate": 1.6753810140289608e-05, + "loss": 0.3229079246520996, + "step": 1219 + }, + { + "epoch": 0.6029902384776967, + "grad_norm": 1.060220470914707, + "learning_rate": 1.6747782328440044e-05, + "loss": 0.3366449773311615, + "step": 1220 + }, + { + "epoch": 0.6034844927715309, + "grad_norm": 1.2656940979343048, + "learning_rate": 1.674175001186741e-05, + "loss": 0.4027010500431061, + "step": 1221 + }, + { + "epoch": 0.6039787470653651, + "grad_norm": 1.039989374871811, + "learning_rate": 1.6735713194598798e-05, + "loss": 0.31566083431243896, + "step": 1222 + }, + { + "epoch": 0.6044730013591993, + "grad_norm": 1.1667815915058346, + "learning_rate": 1.67296718806643e-05, + "loss": 0.3361780047416687, + "step": 1223 + }, + { + "epoch": 0.6049672556530334, + "grad_norm": 1.0628494144880791, + "learning_rate": 1.6723626074097007e-05, + "loss": 0.3197939693927765, + "step": 1224 + }, + { + "epoch": 0.6054615099468676, + "grad_norm": 1.078571350485402, + "learning_rate": 1.671757577893302e-05, + "loss": 0.32977360486984253, + "step": 1225 + }, + { + "epoch": 0.6059557642407019, + "grad_norm": 1.1192119082687915, + "learning_rate": 1.671152099921142e-05, + "loss": 0.3434401750564575, + "step": 1226 + }, + { + "epoch": 0.6064500185345361, + "grad_norm": 1.0664877094913836, + "learning_rate": 1.67054617389743e-05, + "loss": 0.33856305480003357, + "step": 1227 + }, + { + "epoch": 0.6069442728283702, + "grad_norm": 1.147959053573069, + "learning_rate": 1.669939800226673e-05, + "loss": 0.31594911217689514, + "step": 1228 + }, + { + "epoch": 0.6074385271222044, + "grad_norm": 1.105417739927691, + "learning_rate": 1.669332979313678e-05, + "loss": 0.32347679138183594, + "step": 1229 + }, + { + "epoch": 0.6079327814160386, + "grad_norm": 1.1057400329817928, + "learning_rate": 1.6687257115635492e-05, + "loss": 0.32733607292175293, + "step": 1230 + }, + { + "epoch": 0.6084270357098728, + "grad_norm": 0.9869005136013326, + "learning_rate": 1.6681179973816908e-05, + "loss": 0.306827187538147, + "step": 1231 + }, + { + "epoch": 0.6089212900037069, + "grad_norm": 1.068802395839477, + "learning_rate": 1.667509837173803e-05, + "loss": 0.3515884280204773, + "step": 1232 + }, + { + "epoch": 0.6094155442975411, + "grad_norm": 1.0062662165973097, + "learning_rate": 1.6669012313458862e-05, + "loss": 0.28699082136154175, + "step": 1233 + }, + { + "epoch": 0.6099097985913753, + "grad_norm": 1.0697164166178312, + "learning_rate": 1.6662921803042356e-05, + "loss": 0.30737537145614624, + "step": 1234 + }, + { + "epoch": 0.6104040528852095, + "grad_norm": 1.0782793991023802, + "learning_rate": 1.665682684455446e-05, + "loss": 0.3193345069885254, + "step": 1235 + }, + { + "epoch": 0.6108983071790436, + "grad_norm": 1.1629258901733988, + "learning_rate": 1.6650727442064073e-05, + "loss": 0.3326336741447449, + "step": 1236 + }, + { + "epoch": 0.6113925614728778, + "grad_norm": 1.0950813589125916, + "learning_rate": 1.6644623599643076e-05, + "loss": 0.2967267632484436, + "step": 1237 + }, + { + "epoch": 0.611886815766712, + "grad_norm": 1.104366364956542, + "learning_rate": 1.66385153213663e-05, + "loss": 0.3163914084434509, + "step": 1238 + }, + { + "epoch": 0.6123810700605461, + "grad_norm": 1.1913476484695409, + "learning_rate": 1.663240261131155e-05, + "loss": 0.40281808376312256, + "step": 1239 + }, + { + "epoch": 0.6128753243543803, + "grad_norm": 1.1744917859448287, + "learning_rate": 1.6626285473559586e-05, + "loss": 0.33946287631988525, + "step": 1240 + }, + { + "epoch": 0.6133695786482145, + "grad_norm": 1.121011060895708, + "learning_rate": 1.6620163912194114e-05, + "loss": 0.3750913143157959, + "step": 1241 + }, + { + "epoch": 0.6138638329420487, + "grad_norm": 1.1601773319994575, + "learning_rate": 1.6614037931301804e-05, + "loss": 0.32449400424957275, + "step": 1242 + }, + { + "epoch": 0.6143580872358828, + "grad_norm": 1.146035054497973, + "learning_rate": 1.6607907534972277e-05, + "loss": 0.3484799861907959, + "step": 1243 + }, + { + "epoch": 0.614852341529717, + "grad_norm": 1.0478699674323781, + "learning_rate": 1.6601772727298095e-05, + "loss": 0.2991127669811249, + "step": 1244 + }, + { + "epoch": 0.6153465958235512, + "grad_norm": 1.0941316253076903, + "learning_rate": 1.6595633512374768e-05, + "loss": 0.339094340801239, + "step": 1245 + }, + { + "epoch": 0.6158408501173854, + "grad_norm": 1.0756027047064132, + "learning_rate": 1.6589489894300744e-05, + "loss": 0.3147842288017273, + "step": 1246 + }, + { + "epoch": 0.6163351044112195, + "grad_norm": 1.0944450465347566, + "learning_rate": 1.6583341877177427e-05, + "loss": 0.3036183714866638, + "step": 1247 + }, + { + "epoch": 0.6168293587050537, + "grad_norm": 1.0983853525092009, + "learning_rate": 1.657718946510913e-05, + "loss": 0.32657095789909363, + "step": 1248 + }, + { + "epoch": 0.6173236129988879, + "grad_norm": 1.0660730573251251, + "learning_rate": 1.6571032662203126e-05, + "loss": 0.3104664385318756, + "step": 1249 + }, + { + "epoch": 0.6178178672927221, + "grad_norm": 1.0675015064613533, + "learning_rate": 1.6564871472569604e-05, + "loss": 0.30392807722091675, + "step": 1250 + }, + { + "epoch": 0.6183121215865562, + "grad_norm": 1.080894190005694, + "learning_rate": 1.655870590032169e-05, + "loss": 0.3087356388568878, + "step": 1251 + }, + { + "epoch": 0.6188063758803904, + "grad_norm": 1.0633256442775108, + "learning_rate": 1.6552535949575427e-05, + "loss": 0.3220480978488922, + "step": 1252 + }, + { + "epoch": 0.6193006301742247, + "grad_norm": 1.0867949301055795, + "learning_rate": 1.654636162444979e-05, + "loss": 0.33925485610961914, + "step": 1253 + }, + { + "epoch": 0.6197948844680589, + "grad_norm": 1.0651223448844926, + "learning_rate": 1.6540182929066667e-05, + "loss": 0.3704617917537689, + "step": 1254 + }, + { + "epoch": 0.620289138761893, + "grad_norm": 1.1158405395395257, + "learning_rate": 1.653399986755087e-05, + "loss": 0.33745670318603516, + "step": 1255 + }, + { + "epoch": 0.6207833930557272, + "grad_norm": 1.1397943957058634, + "learning_rate": 1.6527812444030118e-05, + "loss": 0.31651467084884644, + "step": 1256 + }, + { + "epoch": 0.6212776473495614, + "grad_norm": 1.141112365152985, + "learning_rate": 1.6521620662635053e-05, + "loss": 0.360455185174942, + "step": 1257 + }, + { + "epoch": 0.6217719016433956, + "grad_norm": 1.0000307812773819, + "learning_rate": 1.6515424527499214e-05, + "loss": 0.32819390296936035, + "step": 1258 + }, + { + "epoch": 0.6222661559372297, + "grad_norm": 1.229539015248975, + "learning_rate": 1.6509224042759053e-05, + "loss": 0.38759690523147583, + "step": 1259 + }, + { + "epoch": 0.6227604102310639, + "grad_norm": 1.127403937815861, + "learning_rate": 1.6503019212553932e-05, + "loss": 0.34250545501708984, + "step": 1260 + }, + { + "epoch": 0.6232546645248981, + "grad_norm": 1.0060644367410545, + "learning_rate": 1.6496810041026097e-05, + "loss": 0.3120163679122925, + "step": 1261 + }, + { + "epoch": 0.6237489188187323, + "grad_norm": 1.1050188267024101, + "learning_rate": 1.649059653232071e-05, + "loss": 0.35985836386680603, + "step": 1262 + }, + { + "epoch": 0.6242431731125664, + "grad_norm": 1.0877426950647728, + "learning_rate": 1.648437869058581e-05, + "loss": 0.3551288843154907, + "step": 1263 + }, + { + "epoch": 0.6247374274064006, + "grad_norm": 1.095568415742879, + "learning_rate": 1.6478156519972354e-05, + "loss": 0.33047816157341003, + "step": 1264 + }, + { + "epoch": 0.6252316817002348, + "grad_norm": 1.0643242802432207, + "learning_rate": 1.6471930024634164e-05, + "loss": 0.32909417152404785, + "step": 1265 + }, + { + "epoch": 0.6257259359940689, + "grad_norm": 1.07195158812182, + "learning_rate": 1.6465699208727964e-05, + "loss": 0.3726924657821655, + "step": 1266 + }, + { + "epoch": 0.6262201902879031, + "grad_norm": 1.1316893144153, + "learning_rate": 1.6459464076413355e-05, + "loss": 0.3569204807281494, + "step": 1267 + }, + { + "epoch": 0.6267144445817373, + "grad_norm": 1.0125649890138406, + "learning_rate": 1.6453224631852825e-05, + "loss": 0.33798107504844666, + "step": 1268 + }, + { + "epoch": 0.6272086988755715, + "grad_norm": 1.1537944647220344, + "learning_rate": 1.644698087921173e-05, + "loss": 0.32891637086868286, + "step": 1269 + }, + { + "epoch": 0.6277029531694056, + "grad_norm": 1.1246833616649612, + "learning_rate": 1.644073282265832e-05, + "loss": 0.31512969732284546, + "step": 1270 + }, + { + "epoch": 0.6281972074632398, + "grad_norm": 1.1199823464164773, + "learning_rate": 1.643448046636371e-05, + "loss": 0.350041925907135, + "step": 1271 + }, + { + "epoch": 0.628691461757074, + "grad_norm": 1.0925989435954497, + "learning_rate": 1.642822381450187e-05, + "loss": 0.3248854875564575, + "step": 1272 + }, + { + "epoch": 0.6291857160509082, + "grad_norm": 1.0344569444697491, + "learning_rate": 1.6421962871249662e-05, + "loss": 0.3031661808490753, + "step": 1273 + }, + { + "epoch": 0.6296799703447423, + "grad_norm": 1.0843035546126185, + "learning_rate": 1.6415697640786802e-05, + "loss": 0.2903754711151123, + "step": 1274 + }, + { + "epoch": 0.6301742246385765, + "grad_norm": 1.0122518499053432, + "learning_rate": 1.6409428127295864e-05, + "loss": 0.300454318523407, + "step": 1275 + }, + { + "epoch": 0.6306684789324107, + "grad_norm": 1.0842968830814483, + "learning_rate": 1.6403154334962286e-05, + "loss": 0.3430244028568268, + "step": 1276 + }, + { + "epoch": 0.6311627332262449, + "grad_norm": 1.1383634793407482, + "learning_rate": 1.6396876267974367e-05, + "loss": 0.3728436827659607, + "step": 1277 + }, + { + "epoch": 0.631656987520079, + "grad_norm": 1.103371729978927, + "learning_rate": 1.639059393052325e-05, + "loss": 0.3021183907985687, + "step": 1278 + }, + { + "epoch": 0.6321512418139132, + "grad_norm": 1.0649900935701406, + "learning_rate": 1.6384307326802934e-05, + "loss": 0.3313615918159485, + "step": 1279 + }, + { + "epoch": 0.6326454961077475, + "grad_norm": 1.0519110395000262, + "learning_rate": 1.637801646101027e-05, + "loss": 0.32833239436149597, + "step": 1280 + }, + { + "epoch": 0.6331397504015817, + "grad_norm": 1.1672616485147485, + "learning_rate": 1.6371721337344947e-05, + "loss": 0.3575769066810608, + "step": 1281 + }, + { + "epoch": 0.6336340046954158, + "grad_norm": 1.044512245658177, + "learning_rate": 1.6365421960009502e-05, + "loss": 0.33323729038238525, + "step": 1282 + }, + { + "epoch": 0.63412825898925, + "grad_norm": 1.150185694461945, + "learning_rate": 1.6359118333209307e-05, + "loss": 0.3522900938987732, + "step": 1283 + }, + { + "epoch": 0.6346225132830842, + "grad_norm": 1.2143932108960407, + "learning_rate": 1.635281046115257e-05, + "loss": 0.3350796699523926, + "step": 1284 + }, + { + "epoch": 0.6351167675769184, + "grad_norm": 1.2071815938700088, + "learning_rate": 1.6346498348050342e-05, + "loss": 0.350632905960083, + "step": 1285 + }, + { + "epoch": 0.6356110218707525, + "grad_norm": 1.0108749382306044, + "learning_rate": 1.6340181998116494e-05, + "loss": 0.2961253523826599, + "step": 1286 + }, + { + "epoch": 0.6361052761645867, + "grad_norm": 1.3686468141070485, + "learning_rate": 1.6333861415567736e-05, + "loss": 0.35736170411109924, + "step": 1287 + }, + { + "epoch": 0.6365995304584209, + "grad_norm": 1.1749750672779442, + "learning_rate": 1.63275366046236e-05, + "loss": 0.35654571652412415, + "step": 1288 + }, + { + "epoch": 0.6370937847522551, + "grad_norm": 1.0658003578898634, + "learning_rate": 1.6321207569506435e-05, + "loss": 0.30518224835395813, + "step": 1289 + }, + { + "epoch": 0.6375880390460892, + "grad_norm": 1.1007851387105425, + "learning_rate": 1.6314874314441413e-05, + "loss": 0.35099470615386963, + "step": 1290 + }, + { + "epoch": 0.6380822933399234, + "grad_norm": 1.0971286067217327, + "learning_rate": 1.6308536843656528e-05, + "loss": 0.3577536344528198, + "step": 1291 + }, + { + "epoch": 0.6385765476337576, + "grad_norm": 1.0395121014513669, + "learning_rate": 1.6302195161382586e-05, + "loss": 0.3141167163848877, + "step": 1292 + }, + { + "epoch": 0.6390708019275917, + "grad_norm": 0.981608659730199, + "learning_rate": 1.62958492718532e-05, + "loss": 0.2920055389404297, + "step": 1293 + }, + { + "epoch": 0.6395650562214259, + "grad_norm": 1.0875768517352407, + "learning_rate": 1.6289499179304797e-05, + "loss": 0.32826486229896545, + "step": 1294 + }, + { + "epoch": 0.6400593105152601, + "grad_norm": 1.0051851075633542, + "learning_rate": 1.628314488797661e-05, + "loss": 0.3080480992794037, + "step": 1295 + }, + { + "epoch": 0.6405535648090943, + "grad_norm": 1.006537470660458, + "learning_rate": 1.627678640211067e-05, + "loss": 0.304529070854187, + "step": 1296 + }, + { + "epoch": 0.6410478191029284, + "grad_norm": 1.1108978139615113, + "learning_rate": 1.627042372595181e-05, + "loss": 0.34653496742248535, + "step": 1297 + }, + { + "epoch": 0.6415420733967626, + "grad_norm": 0.9745027779333038, + "learning_rate": 1.6264056863747667e-05, + "loss": 0.2938673496246338, + "step": 1298 + }, + { + "epoch": 0.6420363276905968, + "grad_norm": 1.1585281714148792, + "learning_rate": 1.625768581974866e-05, + "loss": 0.32350343465805054, + "step": 1299 + }, + { + "epoch": 0.642530581984431, + "grad_norm": 1.0756982630474194, + "learning_rate": 1.6251310598208015e-05, + "loss": 0.3175384998321533, + "step": 1300 + }, + { + "epoch": 0.6430248362782651, + "grad_norm": 1.1335110071944674, + "learning_rate": 1.6244931203381734e-05, + "loss": 0.32667648792266846, + "step": 1301 + }, + { + "epoch": 0.6435190905720993, + "grad_norm": 0.9986052180267636, + "learning_rate": 1.623854763952861e-05, + "loss": 0.30110976099967957, + "step": 1302 + }, + { + "epoch": 0.6440133448659335, + "grad_norm": 1.2219754266907614, + "learning_rate": 1.6232159910910224e-05, + "loss": 0.3508617579936981, + "step": 1303 + }, + { + "epoch": 0.6445075991597677, + "grad_norm": 1.1027211796126624, + "learning_rate": 1.622576802179092e-05, + "loss": 0.34416183829307556, + "step": 1304 + }, + { + "epoch": 0.6450018534536018, + "grad_norm": 1.1267200023483468, + "learning_rate": 1.6219371976437847e-05, + "loss": 0.3509306311607361, + "step": 1305 + }, + { + "epoch": 0.645496107747436, + "grad_norm": 1.1746524244290708, + "learning_rate": 1.6212971779120904e-05, + "loss": 0.36186683177948, + "step": 1306 + }, + { + "epoch": 0.6459903620412702, + "grad_norm": 1.128374133277422, + "learning_rate": 1.6206567434112776e-05, + "loss": 0.3123924732208252, + "step": 1307 + }, + { + "epoch": 0.6464846163351045, + "grad_norm": 1.2141772034453755, + "learning_rate": 1.6200158945688907e-05, + "loss": 0.3691411018371582, + "step": 1308 + }, + { + "epoch": 0.6469788706289386, + "grad_norm": 1.1011618758034853, + "learning_rate": 1.6193746318127516e-05, + "loss": 0.3136986792087555, + "step": 1309 + }, + { + "epoch": 0.6474731249227728, + "grad_norm": 1.0883839992045683, + "learning_rate": 1.6187329555709585e-05, + "loss": 0.30374211072921753, + "step": 1310 + }, + { + "epoch": 0.647967379216607, + "grad_norm": 1.207837369942263, + "learning_rate": 1.618090866271884e-05, + "loss": 0.3633323907852173, + "step": 1311 + }, + { + "epoch": 0.6484616335104412, + "grad_norm": 1.056749654034174, + "learning_rate": 1.6174483643441795e-05, + "loss": 0.31395208835601807, + "step": 1312 + }, + { + "epoch": 0.6489558878042753, + "grad_norm": 1.0312943002596973, + "learning_rate": 1.6168054502167687e-05, + "loss": 0.29258471727371216, + "step": 1313 + }, + { + "epoch": 0.6494501420981095, + "grad_norm": 1.052844702612926, + "learning_rate": 1.6161621243188528e-05, + "loss": 0.3086007833480835, + "step": 1314 + }, + { + "epoch": 0.6499443963919437, + "grad_norm": 1.1099907156572013, + "learning_rate": 1.6155183870799063e-05, + "loss": 0.3604614734649658, + "step": 1315 + }, + { + "epoch": 0.6504386506857779, + "grad_norm": 1.230657559418624, + "learning_rate": 1.614874238929679e-05, + "loss": 0.3784678876399994, + "step": 1316 + }, + { + "epoch": 0.650932904979612, + "grad_norm": 0.9692609071600233, + "learning_rate": 1.6142296802981957e-05, + "loss": 0.29009610414505005, + "step": 1317 + }, + { + "epoch": 0.6514271592734462, + "grad_norm": 1.1385261282180998, + "learning_rate": 1.6135847116157542e-05, + "loss": 0.3667104244232178, + "step": 1318 + }, + { + "epoch": 0.6519214135672804, + "grad_norm": 1.0454111919656257, + "learning_rate": 1.6129393333129262e-05, + "loss": 0.3100985884666443, + "step": 1319 + }, + { + "epoch": 0.6524156678611145, + "grad_norm": 1.0967001531345488, + "learning_rate": 1.612293545820557e-05, + "loss": 0.34128522872924805, + "step": 1320 + }, + { + "epoch": 0.6529099221549487, + "grad_norm": 1.016572733864691, + "learning_rate": 1.611647349569765e-05, + "loss": 0.3017216920852661, + "step": 1321 + }, + { + "epoch": 0.6534041764487829, + "grad_norm": 1.0979244854260226, + "learning_rate": 1.611000744991942e-05, + "loss": 0.35060590505599976, + "step": 1322 + }, + { + "epoch": 0.6538984307426171, + "grad_norm": 1.180855026456707, + "learning_rate": 1.610353732518752e-05, + "loss": 0.3766549825668335, + "step": 1323 + }, + { + "epoch": 0.6543926850364512, + "grad_norm": 0.9954937284294141, + "learning_rate": 1.609706312582131e-05, + "loss": 0.2970678210258484, + "step": 1324 + }, + { + "epoch": 0.6548869393302854, + "grad_norm": 1.2407304893003468, + "learning_rate": 1.609058485614287e-05, + "loss": 0.3345789909362793, + "step": 1325 + }, + { + "epoch": 0.6553811936241196, + "grad_norm": 1.159801774337048, + "learning_rate": 1.608410252047701e-05, + "loss": 0.34838157892227173, + "step": 1326 + }, + { + "epoch": 0.6558754479179538, + "grad_norm": 1.052743453114199, + "learning_rate": 1.6077616123151232e-05, + "loss": 0.27454087138175964, + "step": 1327 + }, + { + "epoch": 0.6563697022117879, + "grad_norm": 1.1304513457691607, + "learning_rate": 1.607112566849577e-05, + "loss": 0.3372647762298584, + "step": 1328 + }, + { + "epoch": 0.6568639565056221, + "grad_norm": 1.1678098502989476, + "learning_rate": 1.606463116084356e-05, + "loss": 0.34433993697166443, + "step": 1329 + }, + { + "epoch": 0.6573582107994563, + "grad_norm": 1.0760327464429003, + "learning_rate": 1.6058132604530242e-05, + "loss": 0.3267759382724762, + "step": 1330 + }, + { + "epoch": 0.6578524650932905, + "grad_norm": 1.044029067228307, + "learning_rate": 1.6051630003894155e-05, + "loss": 0.3022347390651703, + "step": 1331 + }, + { + "epoch": 0.6583467193871246, + "grad_norm": 1.0701124312590375, + "learning_rate": 1.604512336327634e-05, + "loss": 0.32478266954421997, + "step": 1332 + }, + { + "epoch": 0.6588409736809588, + "grad_norm": 1.1194211733981758, + "learning_rate": 1.6038612687020548e-05, + "loss": 0.32039204239845276, + "step": 1333 + }, + { + "epoch": 0.659335227974793, + "grad_norm": 1.189072572166891, + "learning_rate": 1.6032097979473203e-05, + "loss": 0.3376410901546478, + "step": 1334 + }, + { + "epoch": 0.6598294822686273, + "grad_norm": 1.0209465387535948, + "learning_rate": 1.6025579244983443e-05, + "loss": 0.28432029485702515, + "step": 1335 + }, + { + "epoch": 0.6603237365624613, + "grad_norm": 1.1101085579973957, + "learning_rate": 1.6019056487903067e-05, + "loss": 0.3349001109600067, + "step": 1336 + }, + { + "epoch": 0.6608179908562956, + "grad_norm": 1.016991018325495, + "learning_rate": 1.601252971258658e-05, + "loss": 0.27995598316192627, + "step": 1337 + }, + { + "epoch": 0.6613122451501298, + "grad_norm": 1.0652875110729838, + "learning_rate": 1.6005998923391172e-05, + "loss": 0.28326892852783203, + "step": 1338 + }, + { + "epoch": 0.661806499443964, + "grad_norm": 1.1089400050162956, + "learning_rate": 1.5999464124676697e-05, + "loss": 0.3139200806617737, + "step": 1339 + }, + { + "epoch": 0.6623007537377981, + "grad_norm": 1.0857703956199403, + "learning_rate": 1.5992925320805688e-05, + "loss": 0.32395505905151367, + "step": 1340 + }, + { + "epoch": 0.6627950080316323, + "grad_norm": 1.187400707476865, + "learning_rate": 1.598638251614337e-05, + "loss": 0.35880255699157715, + "step": 1341 + }, + { + "epoch": 0.6632892623254665, + "grad_norm": 1.1264632686384342, + "learning_rate": 1.5979835715057616e-05, + "loss": 0.3696775436401367, + "step": 1342 + }, + { + "epoch": 0.6637835166193007, + "grad_norm": 1.2084738763641774, + "learning_rate": 1.597328492191898e-05, + "loss": 0.38413193821907043, + "step": 1343 + }, + { + "epoch": 0.6642777709131348, + "grad_norm": 2.0572947223290017, + "learning_rate": 1.596673014110068e-05, + "loss": 0.3564830720424652, + "step": 1344 + }, + { + "epoch": 0.664772025206969, + "grad_norm": 1.0170026931569898, + "learning_rate": 1.5960171376978587e-05, + "loss": 0.30634552240371704, + "step": 1345 + }, + { + "epoch": 0.6652662795008032, + "grad_norm": 1.0375692111937291, + "learning_rate": 1.595360863393125e-05, + "loss": 0.27113068103790283, + "step": 1346 + }, + { + "epoch": 0.6657605337946373, + "grad_norm": 1.242773829739391, + "learning_rate": 1.594704191633985e-05, + "loss": 0.34015512466430664, + "step": 1347 + }, + { + "epoch": 0.6662547880884715, + "grad_norm": 0.9724222230737607, + "learning_rate": 1.594047122858824e-05, + "loss": 0.2509229779243469, + "step": 1348 + }, + { + "epoch": 0.6667490423823057, + "grad_norm": 1.0705371704599513, + "learning_rate": 1.5933896575062922e-05, + "loss": 0.35122111439704895, + "step": 1349 + }, + { + "epoch": 0.6672432966761399, + "grad_norm": 1.0469402955634624, + "learning_rate": 1.592731796015303e-05, + "loss": 0.3656314015388489, + "step": 1350 + }, + { + "epoch": 0.667737550969974, + "grad_norm": 1.0980190562444532, + "learning_rate": 1.5920735388250363e-05, + "loss": 0.3482551574707031, + "step": 1351 + }, + { + "epoch": 0.6682318052638082, + "grad_norm": 0.9987728958846398, + "learning_rate": 1.5914148863749344e-05, + "loss": 0.2852175831794739, + "step": 1352 + }, + { + "epoch": 0.6687260595576424, + "grad_norm": 1.1231968462948256, + "learning_rate": 1.590755839104705e-05, + "loss": 0.3435940742492676, + "step": 1353 + }, + { + "epoch": 0.6692203138514766, + "grad_norm": 1.2334019463480403, + "learning_rate": 1.590096397454318e-05, + "loss": 0.34816527366638184, + "step": 1354 + }, + { + "epoch": 0.6697145681453107, + "grad_norm": 1.4472355399081582, + "learning_rate": 1.5894365618640077e-05, + "loss": 0.3283170461654663, + "step": 1355 + }, + { + "epoch": 0.6702088224391449, + "grad_norm": 1.1520168978191874, + "learning_rate": 1.588776332774271e-05, + "loss": 0.335905522108078, + "step": 1356 + }, + { + "epoch": 0.6707030767329791, + "grad_norm": 1.1244736910598108, + "learning_rate": 1.5881157106258666e-05, + "loss": 0.3055316209793091, + "step": 1357 + }, + { + "epoch": 0.6711973310268133, + "grad_norm": 1.050666765324263, + "learning_rate": 1.5874546958598172e-05, + "loss": 0.2873142659664154, + "step": 1358 + }, + { + "epoch": 0.6716915853206474, + "grad_norm": 1.0218331884680711, + "learning_rate": 1.586793288917406e-05, + "loss": 0.29659712314605713, + "step": 1359 + }, + { + "epoch": 0.6721858396144816, + "grad_norm": 1.0827802259474617, + "learning_rate": 1.5861314902401802e-05, + "loss": 0.33081990480422974, + "step": 1360 + }, + { + "epoch": 0.6726800939083158, + "grad_norm": 1.2140107638410536, + "learning_rate": 1.5854693002699457e-05, + "loss": 0.3559015691280365, + "step": 1361 + }, + { + "epoch": 0.67317434820215, + "grad_norm": 1.1424828520826207, + "learning_rate": 1.584806719448772e-05, + "loss": 0.3353438973426819, + "step": 1362 + }, + { + "epoch": 0.6736686024959841, + "grad_norm": 1.0533009951881467, + "learning_rate": 1.5841437482189882e-05, + "loss": 0.3320685923099518, + "step": 1363 + }, + { + "epoch": 0.6741628567898184, + "grad_norm": 1.0600254033440624, + "learning_rate": 1.5834803870231846e-05, + "loss": 0.3070179224014282, + "step": 1364 + }, + { + "epoch": 0.6746571110836526, + "grad_norm": 1.0452219544938475, + "learning_rate": 1.5828166363042115e-05, + "loss": 0.28779780864715576, + "step": 1365 + }, + { + "epoch": 0.6751513653774868, + "grad_norm": 0.9932658974656241, + "learning_rate": 1.5821524965051793e-05, + "loss": 0.2793114185333252, + "step": 1366 + }, + { + "epoch": 0.6756456196713209, + "grad_norm": 1.117744874079583, + "learning_rate": 1.5814879680694585e-05, + "loss": 0.3586357831954956, + "step": 1367 + }, + { + "epoch": 0.6761398739651551, + "grad_norm": 1.122494918770383, + "learning_rate": 1.5808230514406786e-05, + "loss": 0.35258832573890686, + "step": 1368 + }, + { + "epoch": 0.6766341282589893, + "grad_norm": 1.0624893424167818, + "learning_rate": 1.5801577470627286e-05, + "loss": 0.2783607840538025, + "step": 1369 + }, + { + "epoch": 0.6771283825528235, + "grad_norm": 1.217710803865883, + "learning_rate": 1.579492055379756e-05, + "loss": 0.3494858741760254, + "step": 1370 + }, + { + "epoch": 0.6776226368466576, + "grad_norm": 1.1913846811426898, + "learning_rate": 1.578825976836167e-05, + "loss": 0.34512561559677124, + "step": 1371 + }, + { + "epoch": 0.6781168911404918, + "grad_norm": 1.0303182849177774, + "learning_rate": 1.5781595118766265e-05, + "loss": 0.2923341989517212, + "step": 1372 + }, + { + "epoch": 0.678611145434326, + "grad_norm": 1.0423481220482165, + "learning_rate": 1.5774926609460566e-05, + "loss": 0.3078833818435669, + "step": 1373 + }, + { + "epoch": 0.6791053997281601, + "grad_norm": 1.0871141007271816, + "learning_rate": 1.576825424489638e-05, + "loss": 0.3147008419036865, + "step": 1374 + }, + { + "epoch": 0.6795996540219943, + "grad_norm": 1.0340836184197277, + "learning_rate": 1.576157802952807e-05, + "loss": 0.2907789349555969, + "step": 1375 + }, + { + "epoch": 0.6800939083158285, + "grad_norm": 1.1801114991913197, + "learning_rate": 1.57548979678126e-05, + "loss": 0.2941555976867676, + "step": 1376 + }, + { + "epoch": 0.6805881626096627, + "grad_norm": 1.137398706652914, + "learning_rate": 1.5748214064209473e-05, + "loss": 0.3452342748641968, + "step": 1377 + }, + { + "epoch": 0.6810824169034968, + "grad_norm": 0.9870368606552603, + "learning_rate": 1.5741526323180765e-05, + "loss": 0.31481361389160156, + "step": 1378 + }, + { + "epoch": 0.681576671197331, + "grad_norm": 1.1734004344416635, + "learning_rate": 1.573483474919112e-05, + "loss": 0.3403349220752716, + "step": 1379 + }, + { + "epoch": 0.6820709254911652, + "grad_norm": 1.3661262290783491, + "learning_rate": 1.572813934670774e-05, + "loss": 0.3283364176750183, + "step": 1380 + }, + { + "epoch": 0.6825651797849994, + "grad_norm": 1.0790334315781973, + "learning_rate": 1.5721440120200376e-05, + "loss": 0.3294883966445923, + "step": 1381 + }, + { + "epoch": 0.6830594340788335, + "grad_norm": 1.057215667272423, + "learning_rate": 1.5714737074141338e-05, + "loss": 0.3087981343269348, + "step": 1382 + }, + { + "epoch": 0.6835536883726677, + "grad_norm": 0.9953380542206125, + "learning_rate": 1.570803021300548e-05, + "loss": 0.29511693120002747, + "step": 1383 + }, + { + "epoch": 0.6840479426665019, + "grad_norm": 1.1147415286539601, + "learning_rate": 1.570131954127021e-05, + "loss": 0.3620823323726654, + "step": 1384 + }, + { + "epoch": 0.6845421969603361, + "grad_norm": 1.2518358127130127, + "learning_rate": 1.5694605063415477e-05, + "loss": 0.3978300988674164, + "step": 1385 + }, + { + "epoch": 0.6850364512541702, + "grad_norm": 1.2104388988265296, + "learning_rate": 1.5687886783923773e-05, + "loss": 0.35367661714553833, + "step": 1386 + }, + { + "epoch": 0.6855307055480044, + "grad_norm": 1.158470270474232, + "learning_rate": 1.5681164707280117e-05, + "loss": 0.3313448131084442, + "step": 1387 + }, + { + "epoch": 0.6860249598418386, + "grad_norm": 1.1312206183637163, + "learning_rate": 1.5674438837972077e-05, + "loss": 0.34115713834762573, + "step": 1388 + }, + { + "epoch": 0.6865192141356729, + "grad_norm": 1.071906380475402, + "learning_rate": 1.566770918048975e-05, + "loss": 0.311326265335083, + "step": 1389 + }, + { + "epoch": 0.687013468429507, + "grad_norm": 1.0496646406815568, + "learning_rate": 1.5660975739325755e-05, + "loss": 0.32622700929641724, + "step": 1390 + }, + { + "epoch": 0.6875077227233412, + "grad_norm": 1.1530479303397307, + "learning_rate": 1.565423851897524e-05, + "loss": 0.36029747128486633, + "step": 1391 + }, + { + "epoch": 0.6880019770171754, + "grad_norm": 0.9691306195768644, + "learning_rate": 1.5647497523935883e-05, + "loss": 0.2771177291870117, + "step": 1392 + }, + { + "epoch": 0.6884962313110096, + "grad_norm": 1.1450942478438548, + "learning_rate": 1.5640752758707868e-05, + "loss": 0.3474002182483673, + "step": 1393 + }, + { + "epoch": 0.6889904856048437, + "grad_norm": 1.09850595363495, + "learning_rate": 1.563400422779391e-05, + "loss": 0.28006255626678467, + "step": 1394 + }, + { + "epoch": 0.6894847398986779, + "grad_norm": 1.0953635794573913, + "learning_rate": 1.562725193569923e-05, + "loss": 0.32151490449905396, + "step": 1395 + }, + { + "epoch": 0.6899789941925121, + "grad_norm": 1.1995785901348681, + "learning_rate": 1.5620495886931557e-05, + "loss": 0.3081187903881073, + "step": 1396 + }, + { + "epoch": 0.6904732484863463, + "grad_norm": 1.1390576796125735, + "learning_rate": 1.561373608600114e-05, + "loss": 0.3158992826938629, + "step": 1397 + }, + { + "epoch": 0.6909675027801804, + "grad_norm": 1.1783652693752096, + "learning_rate": 1.5606972537420723e-05, + "loss": 0.33790335059165955, + "step": 1398 + }, + { + "epoch": 0.6914617570740146, + "grad_norm": 1.1733705340509706, + "learning_rate": 1.5600205245705553e-05, + "loss": 0.3157292902469635, + "step": 1399 + }, + { + "epoch": 0.6919560113678488, + "grad_norm": 1.1674234642263648, + "learning_rate": 1.559343421537338e-05, + "loss": 0.31090572476387024, + "step": 1400 + }, + { + "epoch": 0.6924502656616829, + "grad_norm": 1.1604041250760992, + "learning_rate": 1.5586659450944443e-05, + "loss": 0.30499958992004395, + "step": 1401 + }, + { + "epoch": 0.6929445199555171, + "grad_norm": 1.0713722972416724, + "learning_rate": 1.5579880956941478e-05, + "loss": 0.3036794662475586, + "step": 1402 + }, + { + "epoch": 0.6934387742493513, + "grad_norm": 1.1543376848490539, + "learning_rate": 1.5573098737889716e-05, + "loss": 0.26514700055122375, + "step": 1403 + }, + { + "epoch": 0.6939330285431855, + "grad_norm": 1.0755683699565965, + "learning_rate": 1.5566312798316867e-05, + "loss": 0.31947457790374756, + "step": 1404 + }, + { + "epoch": 0.6944272828370196, + "grad_norm": 1.1317886658483896, + "learning_rate": 1.5559523142753124e-05, + "loss": 0.29387322068214417, + "step": 1405 + }, + { + "epoch": 0.6949215371308538, + "grad_norm": 1.117372828260635, + "learning_rate": 1.555272977573117e-05, + "loss": 0.33459946513175964, + "step": 1406 + }, + { + "epoch": 0.695415791424688, + "grad_norm": 1.2196871082649428, + "learning_rate": 1.5545932701786154e-05, + "loss": 0.31394320726394653, + "step": 1407 + }, + { + "epoch": 0.6959100457185222, + "grad_norm": 1.0669033993360486, + "learning_rate": 1.5539131925455713e-05, + "loss": 0.2891885042190552, + "step": 1408 + }, + { + "epoch": 0.6964043000123563, + "grad_norm": 1.2475463319045528, + "learning_rate": 1.5532327451279938e-05, + "loss": 0.33686599135398865, + "step": 1409 + }, + { + "epoch": 0.6968985543061905, + "grad_norm": 1.0648029492831064, + "learning_rate": 1.5525519283801405e-05, + "loss": 0.31463146209716797, + "step": 1410 + }, + { + "epoch": 0.6973928086000247, + "grad_norm": 1.226099759538899, + "learning_rate": 1.5518707427565146e-05, + "loss": 0.3598940372467041, + "step": 1411 + }, + { + "epoch": 0.6978870628938589, + "grad_norm": 1.149083094787804, + "learning_rate": 1.5511891887118665e-05, + "loss": 0.32980066537857056, + "step": 1412 + }, + { + "epoch": 0.698381317187693, + "grad_norm": 1.1872142618250514, + "learning_rate": 1.5505072667011915e-05, + "loss": 0.3264961242675781, + "step": 1413 + }, + { + "epoch": 0.6988755714815272, + "grad_norm": 1.0604770012284015, + "learning_rate": 1.549824977179731e-05, + "loss": 0.3355519771575928, + "step": 1414 + }, + { + "epoch": 0.6993698257753614, + "grad_norm": 1.0119765938601295, + "learning_rate": 1.5491423206029717e-05, + "loss": 0.27073174715042114, + "step": 1415 + }, + { + "epoch": 0.6998640800691956, + "grad_norm": 1.1356545279602395, + "learning_rate": 1.5484592974266456e-05, + "loss": 0.32638323307037354, + "step": 1416 + }, + { + "epoch": 0.7003583343630297, + "grad_norm": 1.192307972564017, + "learning_rate": 1.5477759081067288e-05, + "loss": 0.38844019174575806, + "step": 1417 + }, + { + "epoch": 0.700852588656864, + "grad_norm": 1.1060104448967631, + "learning_rate": 1.5470921530994426e-05, + "loss": 0.3386498689651489, + "step": 1418 + }, + { + "epoch": 0.7013468429506982, + "grad_norm": 1.113333245203903, + "learning_rate": 1.5464080328612522e-05, + "loss": 0.3304392993450165, + "step": 1419 + }, + { + "epoch": 0.7018410972445324, + "grad_norm": 1.1024158772042199, + "learning_rate": 1.545723547848866e-05, + "loss": 0.314837247133255, + "step": 1420 + }, + { + "epoch": 0.7023353515383665, + "grad_norm": 0.9888192419219921, + "learning_rate": 1.5450386985192368e-05, + "loss": 0.30135127902030945, + "step": 1421 + }, + { + "epoch": 0.7028296058322007, + "grad_norm": 1.0640354824874358, + "learning_rate": 1.5443534853295602e-05, + "loss": 0.29176798462867737, + "step": 1422 + }, + { + "epoch": 0.7033238601260349, + "grad_norm": 1.3021824252266967, + "learning_rate": 1.5436679087372746e-05, + "loss": 0.36438125371932983, + "step": 1423 + }, + { + "epoch": 0.703818114419869, + "grad_norm": 1.1147780995478658, + "learning_rate": 1.542981969200061e-05, + "loss": 0.37140434980392456, + "step": 1424 + }, + { + "epoch": 0.7043123687137032, + "grad_norm": 1.3176538326023695, + "learning_rate": 1.542295667175843e-05, + "loss": 0.36072903871536255, + "step": 1425 + }, + { + "epoch": 0.7048066230075374, + "grad_norm": 1.1262882885574772, + "learning_rate": 1.5416090031227868e-05, + "loss": 0.3266327977180481, + "step": 1426 + }, + { + "epoch": 0.7053008773013716, + "grad_norm": 1.0179565917308762, + "learning_rate": 1.5409219774992978e-05, + "loss": 0.3081423342227936, + "step": 1427 + }, + { + "epoch": 0.7057951315952057, + "grad_norm": 1.3034313694807904, + "learning_rate": 1.5402345907640262e-05, + "loss": 0.3571197986602783, + "step": 1428 + }, + { + "epoch": 0.7062893858890399, + "grad_norm": 1.1385888315844002, + "learning_rate": 1.5395468433758604e-05, + "loss": 0.32380104064941406, + "step": 1429 + }, + { + "epoch": 0.7067836401828741, + "grad_norm": 1.0129718670355197, + "learning_rate": 1.5388587357939313e-05, + "loss": 0.33777546882629395, + "step": 1430 + }, + { + "epoch": 0.7072778944767083, + "grad_norm": 1.0997780610685683, + "learning_rate": 1.5381702684776093e-05, + "loss": 0.31793370842933655, + "step": 1431 + }, + { + "epoch": 0.7077721487705424, + "grad_norm": 1.065324744616134, + "learning_rate": 1.537481441886506e-05, + "loss": 0.3282355070114136, + "step": 1432 + }, + { + "epoch": 0.7082664030643766, + "grad_norm": 1.1740655706878367, + "learning_rate": 1.5367922564804716e-05, + "loss": 0.3523057699203491, + "step": 1433 + }, + { + "epoch": 0.7087606573582108, + "grad_norm": 1.1790295388685894, + "learning_rate": 1.5361027127195964e-05, + "loss": 0.36351460218429565, + "step": 1434 + }, + { + "epoch": 0.709254911652045, + "grad_norm": 2.2339320260763373, + "learning_rate": 1.5354128110642102e-05, + "loss": 0.2936401963233948, + "step": 1435 + }, + { + "epoch": 0.7097491659458791, + "grad_norm": 1.1080576186798932, + "learning_rate": 1.5347225519748818e-05, + "loss": 0.3178175091743469, + "step": 1436 + }, + { + "epoch": 0.7102434202397133, + "grad_norm": 1.1375761171495609, + "learning_rate": 1.5340319359124177e-05, + "loss": 0.3098832666873932, + "step": 1437 + }, + { + "epoch": 0.7107376745335475, + "grad_norm": 0.951807024133746, + "learning_rate": 1.5333409633378633e-05, + "loss": 0.2644941806793213, + "step": 1438 + }, + { + "epoch": 0.7112319288273817, + "grad_norm": 1.1193499530101132, + "learning_rate": 1.5326496347125027e-05, + "loss": 0.3046286702156067, + "step": 1439 + }, + { + "epoch": 0.7117261831212158, + "grad_norm": 1.1009971048909013, + "learning_rate": 1.5319579504978567e-05, + "loss": 0.33757925033569336, + "step": 1440 + }, + { + "epoch": 0.71222043741505, + "grad_norm": 1.1415644120008137, + "learning_rate": 1.5312659111556832e-05, + "loss": 0.3470202684402466, + "step": 1441 + }, + { + "epoch": 0.7127146917088842, + "grad_norm": 1.0829483976260892, + "learning_rate": 1.5305735171479785e-05, + "loss": 0.3310868740081787, + "step": 1442 + }, + { + "epoch": 0.7132089460027184, + "grad_norm": 1.2738694792524405, + "learning_rate": 1.529880768936975e-05, + "loss": 0.31649407744407654, + "step": 1443 + }, + { + "epoch": 0.7137032002965525, + "grad_norm": 1.0510301649062292, + "learning_rate": 1.5291876669851408e-05, + "loss": 0.2986135184764862, + "step": 1444 + }, + { + "epoch": 0.7141974545903867, + "grad_norm": 1.1622525691797543, + "learning_rate": 1.5284942117551817e-05, + "loss": 0.3033408224582672, + "step": 1445 + }, + { + "epoch": 0.714691708884221, + "grad_norm": 1.1648719329133883, + "learning_rate": 1.5278004037100378e-05, + "loss": 0.34231680631637573, + "step": 1446 + }, + { + "epoch": 0.7151859631780552, + "grad_norm": 1.1347301204641653, + "learning_rate": 1.5271062433128857e-05, + "loss": 0.3273579478263855, + "step": 1447 + }, + { + "epoch": 0.7156802174718893, + "grad_norm": 1.2307292916383785, + "learning_rate": 1.5264117310271372e-05, + "loss": 0.344064861536026, + "step": 1448 + }, + { + "epoch": 0.7161744717657235, + "grad_norm": 1.0685505855741966, + "learning_rate": 1.5257168673164384e-05, + "loss": 0.3131038546562195, + "step": 1449 + }, + { + "epoch": 0.7166687260595577, + "grad_norm": 1.1403948273488542, + "learning_rate": 1.5250216526446708e-05, + "loss": 0.32794755697250366, + "step": 1450 + }, + { + "epoch": 0.7171629803533918, + "grad_norm": 1.2597097116316462, + "learning_rate": 1.5243260874759494e-05, + "loss": 0.3633842468261719, + "step": 1451 + }, + { + "epoch": 0.717657234647226, + "grad_norm": 0.943013995379639, + "learning_rate": 1.5236301722746235e-05, + "loss": 0.24650251865386963, + "step": 1452 + }, + { + "epoch": 0.7181514889410602, + "grad_norm": 1.1777840335640666, + "learning_rate": 1.5229339075052769e-05, + "loss": 0.34167230129241943, + "step": 1453 + }, + { + "epoch": 0.7186457432348944, + "grad_norm": 1.0945051908887762, + "learning_rate": 1.522237293632725e-05, + "loss": 0.29454126954078674, + "step": 1454 + }, + { + "epoch": 0.7191399975287285, + "grad_norm": 1.1517995676673816, + "learning_rate": 1.5215403311220178e-05, + "loss": 0.3709314465522766, + "step": 1455 + }, + { + "epoch": 0.7196342518225627, + "grad_norm": 1.1421076533752808, + "learning_rate": 1.5208430204384377e-05, + "loss": 0.3543916642665863, + "step": 1456 + }, + { + "epoch": 0.7201285061163969, + "grad_norm": 1.1924648010793302, + "learning_rate": 1.5201453620474986e-05, + "loss": 0.33827707171440125, + "step": 1457 + }, + { + "epoch": 0.7206227604102311, + "grad_norm": 1.1616070041381745, + "learning_rate": 1.5194473564149484e-05, + "loss": 0.31289514899253845, + "step": 1458 + }, + { + "epoch": 0.7211170147040652, + "grad_norm": 1.1655875507968474, + "learning_rate": 1.5187490040067646e-05, + "loss": 0.3345657289028168, + "step": 1459 + }, + { + "epoch": 0.7216112689978994, + "grad_norm": 1.091971369166992, + "learning_rate": 1.5180503052891578e-05, + "loss": 0.3322404623031616, + "step": 1460 + }, + { + "epoch": 0.7221055232917336, + "grad_norm": 1.0009476128919939, + "learning_rate": 1.5173512607285692e-05, + "loss": 0.31120461225509644, + "step": 1461 + }, + { + "epoch": 0.7225997775855678, + "grad_norm": 1.140979323325151, + "learning_rate": 1.5166518707916714e-05, + "loss": 0.3388645648956299, + "step": 1462 + }, + { + "epoch": 0.7230940318794019, + "grad_norm": 1.098469502784105, + "learning_rate": 1.5159521359453661e-05, + "loss": 0.3048557639122009, + "step": 1463 + }, + { + "epoch": 0.7235882861732361, + "grad_norm": 1.0437743408474436, + "learning_rate": 1.5152520566567873e-05, + "loss": 0.32128047943115234, + "step": 1464 + }, + { + "epoch": 0.7240825404670703, + "grad_norm": 1.0754519434907805, + "learning_rate": 1.5145516333932973e-05, + "loss": 0.3016900420188904, + "step": 1465 + }, + { + "epoch": 0.7245767947609045, + "grad_norm": 0.9730419604339762, + "learning_rate": 1.5138508666224892e-05, + "loss": 0.27410340309143066, + "step": 1466 + }, + { + "epoch": 0.7250710490547386, + "grad_norm": 1.1548137674896846, + "learning_rate": 1.513149756812184e-05, + "loss": 0.314311146736145, + "step": 1467 + }, + { + "epoch": 0.7255653033485728, + "grad_norm": 1.0652992161056178, + "learning_rate": 1.5124483044304339e-05, + "loss": 0.300488144159317, + "step": 1468 + }, + { + "epoch": 0.726059557642407, + "grad_norm": 1.0437811199768454, + "learning_rate": 1.5117465099455173e-05, + "loss": 0.2610424757003784, + "step": 1469 + }, + { + "epoch": 0.7265538119362412, + "grad_norm": 1.0473843452456588, + "learning_rate": 1.5110443738259425e-05, + "loss": 0.2631368637084961, + "step": 1470 + }, + { + "epoch": 0.7270480662300753, + "grad_norm": 1.1572872923696271, + "learning_rate": 1.510341896540446e-05, + "loss": 0.2894716262817383, + "step": 1471 + }, + { + "epoch": 0.7275423205239095, + "grad_norm": 1.1539682565039295, + "learning_rate": 1.5096390785579913e-05, + "loss": 0.2859206199645996, + "step": 1472 + }, + { + "epoch": 0.7280365748177438, + "grad_norm": 1.1861776477785995, + "learning_rate": 1.5089359203477693e-05, + "loss": 0.2966008484363556, + "step": 1473 + }, + { + "epoch": 0.728530829111578, + "grad_norm": 1.0911088494470613, + "learning_rate": 1.5082324223791988e-05, + "loss": 0.3187675476074219, + "step": 1474 + }, + { + "epoch": 0.729025083405412, + "grad_norm": 1.1920802680772398, + "learning_rate": 1.507528585121925e-05, + "loss": 0.32434171438217163, + "step": 1475 + }, + { + "epoch": 0.7295193376992463, + "grad_norm": 1.233732485912319, + "learning_rate": 1.5068244090458197e-05, + "loss": 0.3518364429473877, + "step": 1476 + }, + { + "epoch": 0.7300135919930805, + "grad_norm": 1.091189612496036, + "learning_rate": 1.50611989462098e-05, + "loss": 0.32294291257858276, + "step": 1477 + }, + { + "epoch": 0.7305078462869146, + "grad_norm": 1.184027940449126, + "learning_rate": 1.5054150423177307e-05, + "loss": 0.3413415253162384, + "step": 1478 + }, + { + "epoch": 0.7310021005807488, + "grad_norm": 1.1760745568840743, + "learning_rate": 1.5047098526066207e-05, + "loss": 0.3562566637992859, + "step": 1479 + }, + { + "epoch": 0.731496354874583, + "grad_norm": 1.130494844464842, + "learning_rate": 1.504004325958424e-05, + "loss": 0.30018410086631775, + "step": 1480 + }, + { + "epoch": 0.7319906091684172, + "grad_norm": 1.027268124102698, + "learning_rate": 1.5032984628441409e-05, + "loss": 0.2937701344490051, + "step": 1481 + }, + { + "epoch": 0.7324848634622513, + "grad_norm": 1.131154387943882, + "learning_rate": 1.5025922637349953e-05, + "loss": 0.3268740773200989, + "step": 1482 + }, + { + "epoch": 0.7329791177560855, + "grad_norm": 1.053089747814938, + "learning_rate": 1.5018857291024356e-05, + "loss": 0.3246314525604248, + "step": 1483 + }, + { + "epoch": 0.7334733720499197, + "grad_norm": 1.033026683314433, + "learning_rate": 1.501178859418134e-05, + "loss": 0.276904433965683, + "step": 1484 + }, + { + "epoch": 0.7339676263437539, + "grad_norm": 1.1901915790154476, + "learning_rate": 1.5004716551539873e-05, + "loss": 0.27665287256240845, + "step": 1485 + }, + { + "epoch": 0.734461880637588, + "grad_norm": 1.065690181516995, + "learning_rate": 1.4997641167821143e-05, + "loss": 0.325985848903656, + "step": 1486 + }, + { + "epoch": 0.7349561349314222, + "grad_norm": 1.2333398180696593, + "learning_rate": 1.4990562447748573e-05, + "loss": 0.2951817214488983, + "step": 1487 + }, + { + "epoch": 0.7354503892252564, + "grad_norm": 1.0415622998394476, + "learning_rate": 1.4983480396047822e-05, + "loss": 0.2592772841453552, + "step": 1488 + }, + { + "epoch": 0.7359446435190906, + "grad_norm": 1.0977128928049222, + "learning_rate": 1.4976395017446767e-05, + "loss": 0.3278253674507141, + "step": 1489 + }, + { + "epoch": 0.7364388978129247, + "grad_norm": 2.4840016288238886, + "learning_rate": 1.4969306316675497e-05, + "loss": 0.32366445660591125, + "step": 1490 + }, + { + "epoch": 0.7369331521067589, + "grad_norm": 1.065618785924185, + "learning_rate": 1.4962214298466337e-05, + "loss": 0.30544513463974, + "step": 1491 + }, + { + "epoch": 0.7374274064005931, + "grad_norm": 1.1151764286390358, + "learning_rate": 1.4955118967553812e-05, + "loss": 0.3712898790836334, + "step": 1492 + }, + { + "epoch": 0.7379216606944273, + "grad_norm": 1.072095940180716, + "learning_rate": 1.4948020328674662e-05, + "loss": 0.3006438612937927, + "step": 1493 + }, + { + "epoch": 0.7384159149882614, + "grad_norm": 1.1145573413296936, + "learning_rate": 1.494091838656784e-05, + "loss": 0.3494953215122223, + "step": 1494 + }, + { + "epoch": 0.7389101692820956, + "grad_norm": 1.091824613740768, + "learning_rate": 1.4933813145974504e-05, + "loss": 0.2698785662651062, + "step": 1495 + }, + { + "epoch": 0.7394044235759298, + "grad_norm": 1.1072713673032075, + "learning_rate": 1.4926704611638003e-05, + "loss": 0.34775635600090027, + "step": 1496 + }, + { + "epoch": 0.739898677869764, + "grad_norm": 1.1542085278706422, + "learning_rate": 1.4919592788303898e-05, + "loss": 0.328175812959671, + "step": 1497 + }, + { + "epoch": 0.7403929321635981, + "grad_norm": 1.1735161292651393, + "learning_rate": 1.491247768071994e-05, + "loss": 0.3320178687572479, + "step": 1498 + }, + { + "epoch": 0.7408871864574323, + "grad_norm": 1.8687355330582882, + "learning_rate": 1.4905359293636074e-05, + "loss": 0.308150053024292, + "step": 1499 + }, + { + "epoch": 0.7413814407512666, + "grad_norm": 1.1422704685641505, + "learning_rate": 1.489823763180443e-05, + "loss": 0.3311570882797241, + "step": 1500 + }, + { + "epoch": 0.7418756950451008, + "grad_norm": 1.2844910379105308, + "learning_rate": 1.4891112699979334e-05, + "loss": 0.36916327476501465, + "step": 1501 + }, + { + "epoch": 0.7423699493389349, + "grad_norm": 1.0354244070195735, + "learning_rate": 1.4883984502917286e-05, + "loss": 0.28005337715148926, + "step": 1502 + }, + { + "epoch": 0.7428642036327691, + "grad_norm": 1.2241818166146565, + "learning_rate": 1.4876853045376962e-05, + "loss": 0.3502781391143799, + "step": 1503 + }, + { + "epoch": 0.7433584579266033, + "grad_norm": 1.2448349850537428, + "learning_rate": 1.4869718332119232e-05, + "loss": 0.32032880187034607, + "step": 1504 + }, + { + "epoch": 0.7438527122204374, + "grad_norm": 1.1236679189592251, + "learning_rate": 1.4862580367907118e-05, + "loss": 0.3229472041130066, + "step": 1505 + }, + { + "epoch": 0.7443469665142716, + "grad_norm": 1.087360074547477, + "learning_rate": 1.4855439157505833e-05, + "loss": 0.2725368142127991, + "step": 1506 + }, + { + "epoch": 0.7448412208081058, + "grad_norm": 1.2509876854452482, + "learning_rate": 1.4848294705682737e-05, + "loss": 0.35358861088752747, + "step": 1507 + }, + { + "epoch": 0.74533547510194, + "grad_norm": 1.0843196708603702, + "learning_rate": 1.4841147017207376e-05, + "loss": 0.299206018447876, + "step": 1508 + }, + { + "epoch": 0.7458297293957741, + "grad_norm": 2.7618594064377384, + "learning_rate": 1.4833996096851432e-05, + "loss": 0.32004314661026, + "step": 1509 + }, + { + "epoch": 0.7463239836896083, + "grad_norm": 1.1399779760270892, + "learning_rate": 1.4826841949388767e-05, + "loss": 0.32800590991973877, + "step": 1510 + }, + { + "epoch": 0.7468182379834425, + "grad_norm": 1.112132363505793, + "learning_rate": 1.4819684579595382e-05, + "loss": 0.2916460335254669, + "step": 1511 + }, + { + "epoch": 0.7473124922772767, + "grad_norm": 1.2041472096070427, + "learning_rate": 1.4812523992249437e-05, + "loss": 0.3276118338108063, + "step": 1512 + }, + { + "epoch": 0.7478067465711108, + "grad_norm": 1.2310079375510266, + "learning_rate": 1.4805360192131234e-05, + "loss": 0.34718069434165955, + "step": 1513 + }, + { + "epoch": 0.748301000864945, + "grad_norm": 1.0130113878676084, + "learning_rate": 1.4798193184023233e-05, + "loss": 0.2810167372226715, + "step": 1514 + }, + { + "epoch": 0.7487952551587792, + "grad_norm": 1.1600230287701154, + "learning_rate": 1.4791022972710017e-05, + "loss": 0.3542296886444092, + "step": 1515 + }, + { + "epoch": 0.7492895094526134, + "grad_norm": 1.0717623685966582, + "learning_rate": 1.4783849562978319e-05, + "loss": 0.27578431367874146, + "step": 1516 + }, + { + "epoch": 0.7497837637464475, + "grad_norm": 1.2193919844014014, + "learning_rate": 1.4776672959617006e-05, + "loss": 0.32235798239707947, + "step": 1517 + }, + { + "epoch": 0.7502780180402817, + "grad_norm": 1.073591922439447, + "learning_rate": 1.4769493167417079e-05, + "loss": 0.30588477849960327, + "step": 1518 + }, + { + "epoch": 0.7507722723341159, + "grad_norm": 1.1259837125407774, + "learning_rate": 1.4762310191171657e-05, + "loss": 0.31242361664772034, + "step": 1519 + }, + { + "epoch": 0.7512665266279501, + "grad_norm": 1.2265290610094162, + "learning_rate": 1.4755124035675995e-05, + "loss": 0.3679526150226593, + "step": 1520 + }, + { + "epoch": 0.7517607809217842, + "grad_norm": 1.0185674037419847, + "learning_rate": 1.4747934705727473e-05, + "loss": 0.28588515520095825, + "step": 1521 + }, + { + "epoch": 0.7522550352156184, + "grad_norm": 1.0624456882482982, + "learning_rate": 1.4740742206125582e-05, + "loss": 0.29861775040626526, + "step": 1522 + }, + { + "epoch": 0.7527492895094526, + "grad_norm": 1.1245071890104912, + "learning_rate": 1.4733546541671928e-05, + "loss": 0.31373754143714905, + "step": 1523 + }, + { + "epoch": 0.7532435438032868, + "grad_norm": 1.1569601569555032, + "learning_rate": 1.472634771717024e-05, + "loss": 0.3127061128616333, + "step": 1524 + }, + { + "epoch": 0.7537377980971209, + "grad_norm": 1.0554556810771654, + "learning_rate": 1.4719145737426346e-05, + "loss": 0.33681541681289673, + "step": 1525 + }, + { + "epoch": 0.7542320523909551, + "grad_norm": 1.1202634511050926, + "learning_rate": 1.4711940607248182e-05, + "loss": 0.30266639590263367, + "step": 1526 + }, + { + "epoch": 0.7547263066847893, + "grad_norm": 1.0915134711866425, + "learning_rate": 1.47047323314458e-05, + "loss": 0.2988300323486328, + "step": 1527 + }, + { + "epoch": 0.7552205609786236, + "grad_norm": 1.1041853232471737, + "learning_rate": 1.4697520914831334e-05, + "loss": 0.32679620385169983, + "step": 1528 + }, + { + "epoch": 0.7557148152724577, + "grad_norm": 1.0049846597819565, + "learning_rate": 1.4690306362219024e-05, + "loss": 0.2935605049133301, + "step": 1529 + }, + { + "epoch": 0.7562090695662919, + "grad_norm": 1.1114952379308272, + "learning_rate": 1.4683088678425204e-05, + "loss": 0.303417831659317, + "step": 1530 + }, + { + "epoch": 0.7567033238601261, + "grad_norm": 1.0605597139601082, + "learning_rate": 1.4675867868268295e-05, + "loss": 0.30822527408599854, + "step": 1531 + }, + { + "epoch": 0.7571975781539602, + "grad_norm": 1.0772522309630048, + "learning_rate": 1.4668643936568807e-05, + "loss": 0.3104674220085144, + "step": 1532 + }, + { + "epoch": 0.7576918324477944, + "grad_norm": 1.0598818436947175, + "learning_rate": 1.4661416888149333e-05, + "loss": 0.27899307012557983, + "step": 1533 + }, + { + "epoch": 0.7581860867416286, + "grad_norm": 1.1291791785743877, + "learning_rate": 1.465418672783455e-05, + "loss": 0.3285380005836487, + "step": 1534 + }, + { + "epoch": 0.7586803410354628, + "grad_norm": 1.0773746767557166, + "learning_rate": 1.4646953460451205e-05, + "loss": 0.32028889656066895, + "step": 1535 + }, + { + "epoch": 0.7591745953292969, + "grad_norm": 1.2647242329167074, + "learning_rate": 1.4639717090828127e-05, + "loss": 0.29870709776878357, + "step": 1536 + }, + { + "epoch": 0.7596688496231311, + "grad_norm": 1.1833149129368068, + "learning_rate": 1.4632477623796216e-05, + "loss": 0.3556699752807617, + "step": 1537 + }, + { + "epoch": 0.7601631039169653, + "grad_norm": 1.158341046754784, + "learning_rate": 1.462523506418843e-05, + "loss": 0.3433789014816284, + "step": 1538 + }, + { + "epoch": 0.7606573582107995, + "grad_norm": 1.2526530715160118, + "learning_rate": 1.4617989416839802e-05, + "loss": 0.3146114945411682, + "step": 1539 + }, + { + "epoch": 0.7611516125046336, + "grad_norm": 1.0254798742920868, + "learning_rate": 1.4610740686587424e-05, + "loss": 0.29029202461242676, + "step": 1540 + }, + { + "epoch": 0.7616458667984678, + "grad_norm": 1.2367212561484746, + "learning_rate": 1.4603488878270442e-05, + "loss": 0.2976688742637634, + "step": 1541 + }, + { + "epoch": 0.762140121092302, + "grad_norm": 1.0384933941129642, + "learning_rate": 1.459623399673006e-05, + "loss": 0.28604352474212646, + "step": 1542 + }, + { + "epoch": 0.7626343753861362, + "grad_norm": 1.2070329997652125, + "learning_rate": 1.4588976046809536e-05, + "loss": 0.34977301955223083, + "step": 1543 + }, + { + "epoch": 0.7631286296799703, + "grad_norm": 1.1108699937366455, + "learning_rate": 1.458171503335417e-05, + "loss": 0.31592974066734314, + "step": 1544 + }, + { + "epoch": 0.7636228839738045, + "grad_norm": 1.224645404968216, + "learning_rate": 1.4574450961211312e-05, + "loss": 0.31539830565452576, + "step": 1545 + }, + { + "epoch": 0.7641171382676387, + "grad_norm": 1.1914008033212045, + "learning_rate": 1.4567183835230355e-05, + "loss": 0.3100752532482147, + "step": 1546 + }, + { + "epoch": 0.7646113925614729, + "grad_norm": 1.1973069016485758, + "learning_rate": 1.4559913660262726e-05, + "loss": 0.31005364656448364, + "step": 1547 + }, + { + "epoch": 0.765105646855307, + "grad_norm": 1.102020410207535, + "learning_rate": 1.4552640441161889e-05, + "loss": 0.3050577640533447, + "step": 1548 + }, + { + "epoch": 0.7655999011491412, + "grad_norm": 1.1151715417212549, + "learning_rate": 1.4545364182783343e-05, + "loss": 0.294721394777298, + "step": 1549 + }, + { + "epoch": 0.7660941554429754, + "grad_norm": 1.0907882805879732, + "learning_rate": 1.4538084889984616e-05, + "loss": 0.2974075376987457, + "step": 1550 + }, + { + "epoch": 0.7665884097368096, + "grad_norm": 1.218483256285566, + "learning_rate": 1.4530802567625259e-05, + "loss": 0.3247089385986328, + "step": 1551 + }, + { + "epoch": 0.7670826640306437, + "grad_norm": 1.1751647980540385, + "learning_rate": 1.4523517220566843e-05, + "loss": 0.3219151198863983, + "step": 1552 + }, + { + "epoch": 0.7675769183244779, + "grad_norm": 1.092743254793347, + "learning_rate": 1.4516228853672962e-05, + "loss": 0.30580246448516846, + "step": 1553 + }, + { + "epoch": 0.7680711726183121, + "grad_norm": 1.0670412493946726, + "learning_rate": 1.4508937471809233e-05, + "loss": 0.2983207702636719, + "step": 1554 + }, + { + "epoch": 0.7685654269121464, + "grad_norm": 1.1155603134808716, + "learning_rate": 1.4501643079843266e-05, + "loss": 0.3429039418697357, + "step": 1555 + }, + { + "epoch": 0.7690596812059804, + "grad_norm": 1.0600298870014666, + "learning_rate": 1.4494345682644704e-05, + "loss": 0.3055192530155182, + "step": 1556 + }, + { + "epoch": 0.7695539354998147, + "grad_norm": 1.0843598968647987, + "learning_rate": 1.4487045285085178e-05, + "loss": 0.2964102327823639, + "step": 1557 + }, + { + "epoch": 0.7700481897936489, + "grad_norm": 1.0436581793993642, + "learning_rate": 1.4479741892038335e-05, + "loss": 0.3088444471359253, + "step": 1558 + }, + { + "epoch": 0.770542444087483, + "grad_norm": 1.070280126063037, + "learning_rate": 1.4472435508379808e-05, + "loss": 0.28697890043258667, + "step": 1559 + }, + { + "epoch": 0.7710366983813172, + "grad_norm": 1.1055317673748768, + "learning_rate": 1.4465126138987242e-05, + "loss": 0.3664681315422058, + "step": 1560 + }, + { + "epoch": 0.7715309526751514, + "grad_norm": 1.1042702127280148, + "learning_rate": 1.4457813788740263e-05, + "loss": 0.3282932937145233, + "step": 1561 + }, + { + "epoch": 0.7720252069689856, + "grad_norm": 1.4857133307558297, + "learning_rate": 1.4450498462520495e-05, + "loss": 0.27597576379776, + "step": 1562 + }, + { + "epoch": 0.7725194612628197, + "grad_norm": 1.2214452597170176, + "learning_rate": 1.4443180165211541e-05, + "loss": 0.3553946614265442, + "step": 1563 + }, + { + "epoch": 0.7730137155566539, + "grad_norm": 1.1827716129984904, + "learning_rate": 1.4435858901698995e-05, + "loss": 0.36224859952926636, + "step": 1564 + }, + { + "epoch": 0.7735079698504881, + "grad_norm": 1.1341836510498036, + "learning_rate": 1.4428534676870427e-05, + "loss": 0.2940914034843445, + "step": 1565 + }, + { + "epoch": 0.7740022241443223, + "grad_norm": 0.9563512574257287, + "learning_rate": 1.4421207495615385e-05, + "loss": 0.2717741131782532, + "step": 1566 + }, + { + "epoch": 0.7744964784381564, + "grad_norm": 1.1327871067959112, + "learning_rate": 1.441387736282539e-05, + "loss": 0.32340431213378906, + "step": 1567 + }, + { + "epoch": 0.7749907327319906, + "grad_norm": 1.1090264087970254, + "learning_rate": 1.4406544283393935e-05, + "loss": 0.3080120086669922, + "step": 1568 + }, + { + "epoch": 0.7754849870258248, + "grad_norm": 1.4441577426158039, + "learning_rate": 1.4399208262216475e-05, + "loss": 0.3118380308151245, + "step": 1569 + }, + { + "epoch": 0.775979241319659, + "grad_norm": 1.3307213271784917, + "learning_rate": 1.439186930419044e-05, + "loss": 0.3086084723472595, + "step": 1570 + }, + { + "epoch": 0.7764734956134931, + "grad_norm": 1.1593176371811458, + "learning_rate": 1.438452741421521e-05, + "loss": 0.3233364522457123, + "step": 1571 + }, + { + "epoch": 0.7769677499073273, + "grad_norm": 1.0623874748102813, + "learning_rate": 1.4377182597192124e-05, + "loss": 0.29029640555381775, + "step": 1572 + }, + { + "epoch": 0.7774620042011615, + "grad_norm": 0.9791711244739897, + "learning_rate": 1.4369834858024476e-05, + "loss": 0.2888006567955017, + "step": 1573 + }, + { + "epoch": 0.7779562584949957, + "grad_norm": 1.1118016172702438, + "learning_rate": 1.4362484201617519e-05, + "loss": 0.3260151743888855, + "step": 1574 + }, + { + "epoch": 0.7784505127888298, + "grad_norm": 1.3306536044832058, + "learning_rate": 1.4355130632878439e-05, + "loss": 0.333207905292511, + "step": 1575 + }, + { + "epoch": 0.778944767082664, + "grad_norm": 1.0844273121477916, + "learning_rate": 1.4347774156716375e-05, + "loss": 0.2577935457229614, + "step": 1576 + }, + { + "epoch": 0.7794390213764982, + "grad_norm": 1.0777103823564191, + "learning_rate": 1.434041477804241e-05, + "loss": 0.29645979404449463, + "step": 1577 + }, + { + "epoch": 0.7799332756703324, + "grad_norm": 1.1743796307407597, + "learning_rate": 1.433305250176955e-05, + "loss": 0.2973156273365021, + "step": 1578 + }, + { + "epoch": 0.7804275299641665, + "grad_norm": 1.0277241805983874, + "learning_rate": 1.4325687332812754e-05, + "loss": 0.29159975051879883, + "step": 1579 + }, + { + "epoch": 0.7809217842580007, + "grad_norm": 1.1751334806332727, + "learning_rate": 1.4318319276088902e-05, + "loss": 0.29718664288520813, + "step": 1580 + }, + { + "epoch": 0.781416038551835, + "grad_norm": 1.316577919508971, + "learning_rate": 1.4310948336516803e-05, + "loss": 0.3262369632720947, + "step": 1581 + }, + { + "epoch": 0.781910292845669, + "grad_norm": 1.182680350644687, + "learning_rate": 1.4303574519017187e-05, + "loss": 0.36491623520851135, + "step": 1582 + }, + { + "epoch": 0.7824045471395032, + "grad_norm": 1.181580153295467, + "learning_rate": 1.4296197828512716e-05, + "loss": 0.3558582365512848, + "step": 1583 + }, + { + "epoch": 0.7828988014333375, + "grad_norm": 0.9802630700834107, + "learning_rate": 1.428881826992796e-05, + "loss": 0.2745930552482605, + "step": 1584 + }, + { + "epoch": 0.7833930557271717, + "grad_norm": 1.1668091765691224, + "learning_rate": 1.4281435848189404e-05, + "loss": 0.3239384889602661, + "step": 1585 + }, + { + "epoch": 0.7838873100210058, + "grad_norm": 1.0164738185404556, + "learning_rate": 1.4274050568225452e-05, + "loss": 0.2708761692047119, + "step": 1586 + }, + { + "epoch": 0.78438156431484, + "grad_norm": 1.2356501028179845, + "learning_rate": 1.4266662434966412e-05, + "loss": 0.3633013963699341, + "step": 1587 + }, + { + "epoch": 0.7848758186086742, + "grad_norm": 1.2145151160613337, + "learning_rate": 1.425927145334449e-05, + "loss": 0.36411651968955994, + "step": 1588 + }, + { + "epoch": 0.7853700729025084, + "grad_norm": 1.2093753197442545, + "learning_rate": 1.4251877628293804e-05, + "loss": 0.3120966851711273, + "step": 1589 + }, + { + "epoch": 0.7858643271963425, + "grad_norm": 1.111474907013162, + "learning_rate": 1.4244480964750365e-05, + "loss": 0.32788634300231934, + "step": 1590 + }, + { + "epoch": 0.7863585814901767, + "grad_norm": 1.1320230499507122, + "learning_rate": 1.423708146765208e-05, + "loss": 0.2919159233570099, + "step": 1591 + }, + { + "epoch": 0.7868528357840109, + "grad_norm": 1.1271090926469096, + "learning_rate": 1.4229679141938749e-05, + "loss": 0.3135683834552765, + "step": 1592 + }, + { + "epoch": 0.7873470900778451, + "grad_norm": 1.2447784007425877, + "learning_rate": 1.4222273992552058e-05, + "loss": 0.351981520652771, + "step": 1593 + }, + { + "epoch": 0.7878413443716792, + "grad_norm": 1.1846979202846248, + "learning_rate": 1.4214866024435576e-05, + "loss": 0.3615785837173462, + "step": 1594 + }, + { + "epoch": 0.7883355986655134, + "grad_norm": 1.1632616021817466, + "learning_rate": 1.420745524253476e-05, + "loss": 0.29399484395980835, + "step": 1595 + }, + { + "epoch": 0.7888298529593476, + "grad_norm": 1.1714512606078011, + "learning_rate": 1.420004165179694e-05, + "loss": 0.30501872301101685, + "step": 1596 + }, + { + "epoch": 0.7893241072531818, + "grad_norm": 1.1172632404953093, + "learning_rate": 1.4192625257171331e-05, + "loss": 0.33745667338371277, + "step": 1597 + }, + { + "epoch": 0.7898183615470159, + "grad_norm": 0.994693525988225, + "learning_rate": 1.4185206063609e-05, + "loss": 0.2675662934780121, + "step": 1598 + }, + { + "epoch": 0.7903126158408501, + "grad_norm": 1.022107075414073, + "learning_rate": 1.41777840760629e-05, + "loss": 0.295659601688385, + "step": 1599 + }, + { + "epoch": 0.7908068701346843, + "grad_norm": 1.119079517603524, + "learning_rate": 1.4170359299487848e-05, + "loss": 0.3164275586605072, + "step": 1600 + }, + { + "epoch": 0.7913011244285185, + "grad_norm": 1.0695885495482724, + "learning_rate": 1.416293173884051e-05, + "loss": 0.3039100766181946, + "step": 1601 + }, + { + "epoch": 0.7917953787223526, + "grad_norm": 1.1080665801372258, + "learning_rate": 1.4155501399079427e-05, + "loss": 0.2994040846824646, + "step": 1602 + }, + { + "epoch": 0.7922896330161868, + "grad_norm": 1.3291271745996591, + "learning_rate": 1.4148068285164984e-05, + "loss": 0.3129369616508484, + "step": 1603 + }, + { + "epoch": 0.792783887310021, + "grad_norm": 1.084724718149673, + "learning_rate": 1.4140632402059424e-05, + "loss": 0.3223167657852173, + "step": 1604 + }, + { + "epoch": 0.7932781416038552, + "grad_norm": 1.0882285752839331, + "learning_rate": 1.4133193754726834e-05, + "loss": 0.2734811305999756, + "step": 1605 + }, + { + "epoch": 0.7937723958976893, + "grad_norm": 1.103029405529104, + "learning_rate": 1.4125752348133148e-05, + "loss": 0.27474087476730347, + "step": 1606 + }, + { + "epoch": 0.7942666501915235, + "grad_norm": 1.0487344928171054, + "learning_rate": 1.4118308187246145e-05, + "loss": 0.2619907557964325, + "step": 1607 + }, + { + "epoch": 0.7947609044853577, + "grad_norm": 1.1981880636142406, + "learning_rate": 1.411086127703544e-05, + "loss": 0.3176937699317932, + "step": 1608 + }, + { + "epoch": 0.7952551587791918, + "grad_norm": 1.12323060393325, + "learning_rate": 1.4103411622472483e-05, + "loss": 0.28044235706329346, + "step": 1609 + }, + { + "epoch": 0.795749413073026, + "grad_norm": 1.0360499320558048, + "learning_rate": 1.409595922853056e-05, + "loss": 0.27778196334838867, + "step": 1610 + }, + { + "epoch": 0.7962436673668603, + "grad_norm": 1.2703211339383462, + "learning_rate": 1.4088504100184777e-05, + "loss": 0.3168628513813019, + "step": 1611 + }, + { + "epoch": 0.7967379216606945, + "grad_norm": 1.1557608708585085, + "learning_rate": 1.4081046242412075e-05, + "loss": 0.30454084277153015, + "step": 1612 + }, + { + "epoch": 0.7972321759545286, + "grad_norm": 1.169123128871501, + "learning_rate": 1.4073585660191214e-05, + "loss": 0.34019169211387634, + "step": 1613 + }, + { + "epoch": 0.7977264302483628, + "grad_norm": 1.173717391982327, + "learning_rate": 1.4066122358502772e-05, + "loss": 0.3044774830341339, + "step": 1614 + }, + { + "epoch": 0.798220684542197, + "grad_norm": 1.1570346377203322, + "learning_rate": 1.4058656342329136e-05, + "loss": 0.3181847333908081, + "step": 1615 + }, + { + "epoch": 0.7987149388360312, + "grad_norm": 1.249158616205248, + "learning_rate": 1.405118761665452e-05, + "loss": 0.3400845229625702, + "step": 1616 + }, + { + "epoch": 0.7992091931298653, + "grad_norm": 1.2103435711338524, + "learning_rate": 1.4043716186464935e-05, + "loss": 0.2845221161842346, + "step": 1617 + }, + { + "epoch": 0.7997034474236995, + "grad_norm": 1.060854004382088, + "learning_rate": 1.4036242056748202e-05, + "loss": 0.27315276861190796, + "step": 1618 + }, + { + "epoch": 0.8001977017175337, + "grad_norm": 1.2994888590220768, + "learning_rate": 1.4028765232493942e-05, + "loss": 0.3388780951499939, + "step": 1619 + }, + { + "epoch": 0.8006919560113679, + "grad_norm": 1.282329812705599, + "learning_rate": 1.4021285718693581e-05, + "loss": 0.338635116815567, + "step": 1620 + }, + { + "epoch": 0.801186210305202, + "grad_norm": 1.051985157077811, + "learning_rate": 1.4013803520340328e-05, + "loss": 0.26962924003601074, + "step": 1621 + }, + { + "epoch": 0.8016804645990362, + "grad_norm": 1.119736165525956, + "learning_rate": 1.4006318642429194e-05, + "loss": 0.32106393575668335, + "step": 1622 + }, + { + "epoch": 0.8021747188928704, + "grad_norm": 1.1215264874092639, + "learning_rate": 1.399883108995698e-05, + "loss": 0.33063358068466187, + "step": 1623 + }, + { + "epoch": 0.8026689731867046, + "grad_norm": 1.2875541426354853, + "learning_rate": 1.3991340867922266e-05, + "loss": 0.31906163692474365, + "step": 1624 + }, + { + "epoch": 0.8031632274805387, + "grad_norm": 1.0397829646035845, + "learning_rate": 1.3983847981325415e-05, + "loss": 0.2601381242275238, + "step": 1625 + }, + { + "epoch": 0.8036574817743729, + "grad_norm": 1.1557585059548563, + "learning_rate": 1.3976352435168577e-05, + "loss": 0.3342537581920624, + "step": 1626 + }, + { + "epoch": 0.8041517360682071, + "grad_norm": 1.2564737583224261, + "learning_rate": 1.3968854234455669e-05, + "loss": 0.3372059166431427, + "step": 1627 + }, + { + "epoch": 0.8046459903620413, + "grad_norm": 1.1676806235835944, + "learning_rate": 1.3961353384192377e-05, + "loss": 0.31026744842529297, + "step": 1628 + }, + { + "epoch": 0.8051402446558754, + "grad_norm": 1.0921501695742, + "learning_rate": 1.3953849889386173e-05, + "loss": 0.2867652177810669, + "step": 1629 + }, + { + "epoch": 0.8056344989497096, + "grad_norm": 1.1055169200249502, + "learning_rate": 1.3946343755046274e-05, + "loss": 0.29169392585754395, + "step": 1630 + }, + { + "epoch": 0.8061287532435438, + "grad_norm": 1.0753220774925722, + "learning_rate": 1.393883498618367e-05, + "loss": 0.2976510524749756, + "step": 1631 + }, + { + "epoch": 0.806623007537378, + "grad_norm": 1.1387290098549956, + "learning_rate": 1.3931323587811107e-05, + "loss": 0.2900371551513672, + "step": 1632 + }, + { + "epoch": 0.8071172618312121, + "grad_norm": 1.0560549112494348, + "learning_rate": 1.3923809564943093e-05, + "loss": 0.31660354137420654, + "step": 1633 + }, + { + "epoch": 0.8076115161250463, + "grad_norm": 1.079892158607702, + "learning_rate": 1.3916292922595875e-05, + "loss": 0.3099827468395233, + "step": 1634 + }, + { + "epoch": 0.8081057704188805, + "grad_norm": 1.1450154190444473, + "learning_rate": 1.3908773665787459e-05, + "loss": 0.34322571754455566, + "step": 1635 + }, + { + "epoch": 0.8086000247127146, + "grad_norm": 1.0812992610334402, + "learning_rate": 1.3901251799537592e-05, + "loss": 0.2780989408493042, + "step": 1636 + }, + { + "epoch": 0.8090942790065488, + "grad_norm": 1.1023331343203706, + "learning_rate": 1.389372732886777e-05, + "loss": 0.31049463152885437, + "step": 1637 + }, + { + "epoch": 0.809588533300383, + "grad_norm": 1.0442513462466116, + "learning_rate": 1.3886200258801213e-05, + "loss": 0.29925107955932617, + "step": 1638 + }, + { + "epoch": 0.8100827875942173, + "grad_norm": 1.1039524368767084, + "learning_rate": 1.3878670594362893e-05, + "loss": 0.31893983483314514, + "step": 1639 + }, + { + "epoch": 0.8105770418880514, + "grad_norm": 1.1529184850949745, + "learning_rate": 1.3871138340579502e-05, + "loss": 0.31307080388069153, + "step": 1640 + }, + { + "epoch": 0.8110712961818856, + "grad_norm": 1.068880489111062, + "learning_rate": 1.3863603502479465e-05, + "loss": 0.28198909759521484, + "step": 1641 + }, + { + "epoch": 0.8115655504757198, + "grad_norm": 1.085975037148026, + "learning_rate": 1.3856066085092936e-05, + "loss": 0.28937461972236633, + "step": 1642 + }, + { + "epoch": 0.812059804769554, + "grad_norm": 1.2145338549731968, + "learning_rate": 1.3848526093451789e-05, + "loss": 0.32332292199134827, + "step": 1643 + }, + { + "epoch": 0.8125540590633881, + "grad_norm": 1.1174132019487801, + "learning_rate": 1.3840983532589606e-05, + "loss": 0.3059847056865692, + "step": 1644 + }, + { + "epoch": 0.8130483133572223, + "grad_norm": 1.0961695985122493, + "learning_rate": 1.3833438407541698e-05, + "loss": 0.2939583957195282, + "step": 1645 + }, + { + "epoch": 0.8135425676510565, + "grad_norm": 1.098340825845408, + "learning_rate": 1.3825890723345082e-05, + "loss": 0.3293933868408203, + "step": 1646 + }, + { + "epoch": 0.8140368219448907, + "grad_norm": 1.230371641918686, + "learning_rate": 1.3818340485038488e-05, + "loss": 0.33373600244522095, + "step": 1647 + }, + { + "epoch": 0.8145310762387248, + "grad_norm": 1.0438422749127716, + "learning_rate": 1.3810787697662337e-05, + "loss": 0.2716716527938843, + "step": 1648 + }, + { + "epoch": 0.815025330532559, + "grad_norm": 1.0698510483790238, + "learning_rate": 1.3803232366258774e-05, + "loss": 0.26109835505485535, + "step": 1649 + }, + { + "epoch": 0.8155195848263932, + "grad_norm": 1.1399890180248013, + "learning_rate": 1.3795674495871627e-05, + "loss": 0.3161536753177643, + "step": 1650 + }, + { + "epoch": 0.8160138391202274, + "grad_norm": 1.0602857079391073, + "learning_rate": 1.3788114091546414e-05, + "loss": 0.3078432083129883, + "step": 1651 + }, + { + "epoch": 0.8165080934140615, + "grad_norm": 1.2174185154702881, + "learning_rate": 1.3780551158330364e-05, + "loss": 0.31023627519607544, + "step": 1652 + }, + { + "epoch": 0.8170023477078957, + "grad_norm": 1.2616374354619766, + "learning_rate": 1.3772985701272374e-05, + "loss": 0.3438849151134491, + "step": 1653 + }, + { + "epoch": 0.8174966020017299, + "grad_norm": 1.178467386446937, + "learning_rate": 1.376541772542304e-05, + "loss": 0.31897789239883423, + "step": 1654 + }, + { + "epoch": 0.8179908562955641, + "grad_norm": 1.1125307511503921, + "learning_rate": 1.3757847235834636e-05, + "loss": 0.3101171553134918, + "step": 1655 + }, + { + "epoch": 0.8184851105893982, + "grad_norm": 1.0084998738545823, + "learning_rate": 1.375027423756111e-05, + "loss": 0.28926995396614075, + "step": 1656 + }, + { + "epoch": 0.8189793648832324, + "grad_norm": 1.0711136459333532, + "learning_rate": 1.3742698735658087e-05, + "loss": 0.322610080242157, + "step": 1657 + }, + { + "epoch": 0.8194736191770666, + "grad_norm": 1.272869487692088, + "learning_rate": 1.3735120735182865e-05, + "loss": 0.27430039644241333, + "step": 1658 + }, + { + "epoch": 0.8199678734709008, + "grad_norm": 1.217768709412782, + "learning_rate": 1.3727540241194408e-05, + "loss": 0.3091571629047394, + "step": 1659 + }, + { + "epoch": 0.8204621277647349, + "grad_norm": 1.1956758026057746, + "learning_rate": 1.3719957258753347e-05, + "loss": 0.3039378523826599, + "step": 1660 + }, + { + "epoch": 0.8209563820585691, + "grad_norm": 1.0829759157920493, + "learning_rate": 1.371237179292197e-05, + "loss": 0.29711851477622986, + "step": 1661 + }, + { + "epoch": 0.8214506363524033, + "grad_norm": 1.238383962420106, + "learning_rate": 1.370478384876423e-05, + "loss": 0.32411956787109375, + "step": 1662 + }, + { + "epoch": 0.8219448906462374, + "grad_norm": 1.124344248809279, + "learning_rate": 1.3697193431345725e-05, + "loss": 0.2981719672679901, + "step": 1663 + }, + { + "epoch": 0.8224391449400716, + "grad_norm": 1.1755056696925432, + "learning_rate": 1.3689600545733713e-05, + "loss": 0.32756730914115906, + "step": 1664 + }, + { + "epoch": 0.8229333992339058, + "grad_norm": 1.2289352827455349, + "learning_rate": 1.3682005196997094e-05, + "loss": 0.3910979628562927, + "step": 1665 + }, + { + "epoch": 0.82342765352774, + "grad_norm": 1.1421419999727185, + "learning_rate": 1.3674407390206417e-05, + "loss": 0.31716856360435486, + "step": 1666 + }, + { + "epoch": 0.8239219078215742, + "grad_norm": 1.087053372594853, + "learning_rate": 1.3666807130433865e-05, + "loss": 0.31816208362579346, + "step": 1667 + }, + { + "epoch": 0.8244161621154084, + "grad_norm": 1.075967107077939, + "learning_rate": 1.3659204422753265e-05, + "loss": 0.3008955121040344, + "step": 1668 + }, + { + "epoch": 0.8249104164092426, + "grad_norm": 1.0356290376868373, + "learning_rate": 1.3651599272240078e-05, + "loss": 0.2957409918308258, + "step": 1669 + }, + { + "epoch": 0.8254046707030768, + "grad_norm": 1.0989692049502364, + "learning_rate": 1.364399168397139e-05, + "loss": 0.33019471168518066, + "step": 1670 + }, + { + "epoch": 0.8258989249969109, + "grad_norm": 1.1429937641963879, + "learning_rate": 1.3636381663025917e-05, + "loss": 0.3532376289367676, + "step": 1671 + }, + { + "epoch": 0.8263931792907451, + "grad_norm": 1.1063068636148639, + "learning_rate": 1.362876921448401e-05, + "loss": 0.2980180084705353, + "step": 1672 + }, + { + "epoch": 0.8268874335845793, + "grad_norm": 1.0418269417044947, + "learning_rate": 1.362115434342762e-05, + "loss": 0.27932479977607727, + "step": 1673 + }, + { + "epoch": 0.8273816878784135, + "grad_norm": 1.1782768860255097, + "learning_rate": 1.3613537054940331e-05, + "loss": 0.2783966064453125, + "step": 1674 + }, + { + "epoch": 0.8278759421722476, + "grad_norm": 1.1803795842967677, + "learning_rate": 1.3605917354107336e-05, + "loss": 0.2957308888435364, + "step": 1675 + }, + { + "epoch": 0.8283701964660818, + "grad_norm": 1.180747940998609, + "learning_rate": 1.3598295246015439e-05, + "loss": 0.31640201807022095, + "step": 1676 + }, + { + "epoch": 0.828864450759916, + "grad_norm": 1.0988354367735653, + "learning_rate": 1.3590670735753047e-05, + "loss": 0.2969709634780884, + "step": 1677 + }, + { + "epoch": 0.8293587050537502, + "grad_norm": 1.1164468460017938, + "learning_rate": 1.3583043828410177e-05, + "loss": 0.34167301654815674, + "step": 1678 + }, + { + "epoch": 0.8298529593475843, + "grad_norm": 1.0956930352290435, + "learning_rate": 1.3575414529078443e-05, + "loss": 0.28540804982185364, + "step": 1679 + }, + { + "epoch": 0.8303472136414185, + "grad_norm": 1.1795719749617215, + "learning_rate": 1.3567782842851054e-05, + "loss": 0.2962091565132141, + "step": 1680 + }, + { + "epoch": 0.8308414679352527, + "grad_norm": 1.1969039130243166, + "learning_rate": 1.3560148774822816e-05, + "loss": 0.3650284707546234, + "step": 1681 + }, + { + "epoch": 0.8313357222290869, + "grad_norm": 1.1374534594887609, + "learning_rate": 1.3552512330090126e-05, + "loss": 0.3134267330169678, + "step": 1682 + }, + { + "epoch": 0.831829976522921, + "grad_norm": 1.0921894303145987, + "learning_rate": 1.3544873513750967e-05, + "loss": 0.3020439147949219, + "step": 1683 + }, + { + "epoch": 0.8323242308167552, + "grad_norm": 0.9393581319245673, + "learning_rate": 1.3537232330904895e-05, + "loss": 0.25083282589912415, + "step": 1684 + }, + { + "epoch": 0.8328184851105894, + "grad_norm": 1.2024147558027563, + "learning_rate": 1.3529588786653063e-05, + "loss": 0.33875352144241333, + "step": 1685 + }, + { + "epoch": 0.8333127394044236, + "grad_norm": 1.0620839528979684, + "learning_rate": 1.3521942886098186e-05, + "loss": 0.2717735171318054, + "step": 1686 + }, + { + "epoch": 0.8338069936982577, + "grad_norm": 1.1255995988400895, + "learning_rate": 1.3514294634344562e-05, + "loss": 0.271842896938324, + "step": 1687 + }, + { + "epoch": 0.8343012479920919, + "grad_norm": 1.3262220000473801, + "learning_rate": 1.3506644036498054e-05, + "loss": 0.29420506954193115, + "step": 1688 + }, + { + "epoch": 0.8347955022859261, + "grad_norm": 1.338127401529371, + "learning_rate": 1.349899109766609e-05, + "loss": 0.3336431682109833, + "step": 1689 + }, + { + "epoch": 0.8352897565797602, + "grad_norm": 1.0514224360912943, + "learning_rate": 1.3491335822957665e-05, + "loss": 0.2848295569419861, + "step": 1690 + }, + { + "epoch": 0.8357840108735944, + "grad_norm": 1.1721842125626762, + "learning_rate": 1.3483678217483327e-05, + "loss": 0.3164542019367218, + "step": 1691 + }, + { + "epoch": 0.8362782651674286, + "grad_norm": 1.1542823329984544, + "learning_rate": 1.3476018286355189e-05, + "loss": 0.3030688762664795, + "step": 1692 + }, + { + "epoch": 0.8367725194612629, + "grad_norm": 1.3329503320081877, + "learning_rate": 1.3468356034686912e-05, + "loss": 0.30218198895454407, + "step": 1693 + }, + { + "epoch": 0.837266773755097, + "grad_norm": 1.1429497129560076, + "learning_rate": 1.3460691467593697e-05, + "loss": 0.3327499032020569, + "step": 1694 + }, + { + "epoch": 0.8377610280489312, + "grad_norm": 1.2198627663252626, + "learning_rate": 1.3453024590192307e-05, + "loss": 0.29298892617225647, + "step": 1695 + }, + { + "epoch": 0.8382552823427654, + "grad_norm": 1.238368209416205, + "learning_rate": 1.344535540760104e-05, + "loss": 0.3096858859062195, + "step": 1696 + }, + { + "epoch": 0.8387495366365996, + "grad_norm": 1.1297510733547198, + "learning_rate": 1.3437683924939731e-05, + "loss": 0.30680233240127563, + "step": 1697 + }, + { + "epoch": 0.8392437909304337, + "grad_norm": 1.13902422944666, + "learning_rate": 1.3430010147329752e-05, + "loss": 0.3139989972114563, + "step": 1698 + }, + { + "epoch": 0.8397380452242679, + "grad_norm": 1.132396621648215, + "learning_rate": 1.3422334079894008e-05, + "loss": 0.30418652296066284, + "step": 1699 + }, + { + "epoch": 0.8402322995181021, + "grad_norm": 1.228592620621731, + "learning_rate": 1.3414655727756931e-05, + "loss": 0.31245100498199463, + "step": 1700 + }, + { + "epoch": 0.8407265538119363, + "grad_norm": 1.1908375195801162, + "learning_rate": 1.3406975096044477e-05, + "loss": 0.3381880223751068, + "step": 1701 + }, + { + "epoch": 0.8412208081057704, + "grad_norm": 1.2009611203254438, + "learning_rate": 1.3399292189884135e-05, + "loss": 0.3359968960285187, + "step": 1702 + }, + { + "epoch": 0.8417150623996046, + "grad_norm": 1.2698512981575327, + "learning_rate": 1.3391607014404891e-05, + "loss": 0.3320350646972656, + "step": 1703 + }, + { + "epoch": 0.8422093166934388, + "grad_norm": 1.1615181813433448, + "learning_rate": 1.3383919574737267e-05, + "loss": 0.32830795645713806, + "step": 1704 + }, + { + "epoch": 0.842703570987273, + "grad_norm": 0.9808785682252426, + "learning_rate": 1.3376229876013285e-05, + "loss": 0.255840927362442, + "step": 1705 + }, + { + "epoch": 0.8431978252811071, + "grad_norm": 1.0739012833500008, + "learning_rate": 1.3368537923366476e-05, + "loss": 0.3110755681991577, + "step": 1706 + }, + { + "epoch": 0.8436920795749413, + "grad_norm": 1.0815136095330147, + "learning_rate": 1.336084372193188e-05, + "loss": 0.28063881397247314, + "step": 1707 + }, + { + "epoch": 0.8441863338687755, + "grad_norm": 1.1539434345644544, + "learning_rate": 1.3353147276846042e-05, + "loss": 0.31297358870506287, + "step": 1708 + }, + { + "epoch": 0.8446805881626097, + "grad_norm": 1.155638509555895, + "learning_rate": 1.3345448593246986e-05, + "loss": 0.30750149488449097, + "step": 1709 + }, + { + "epoch": 0.8451748424564438, + "grad_norm": 1.0259778822912606, + "learning_rate": 1.333774767627425e-05, + "loss": 0.2665224075317383, + "step": 1710 + }, + { + "epoch": 0.845669096750278, + "grad_norm": 1.0618832452009934, + "learning_rate": 1.3330044531068858e-05, + "loss": 0.28920280933380127, + "step": 1711 + }, + { + "epoch": 0.8461633510441122, + "grad_norm": 1.0688762844449171, + "learning_rate": 1.332233916277332e-05, + "loss": 0.2678643465042114, + "step": 1712 + }, + { + "epoch": 0.8466576053379464, + "grad_norm": 1.1389370638959122, + "learning_rate": 1.3314631576531623e-05, + "loss": 0.33682242035865784, + "step": 1713 + }, + { + "epoch": 0.8471518596317805, + "grad_norm": 1.2088936099945806, + "learning_rate": 1.330692177748925e-05, + "loss": 0.36704546213150024, + "step": 1714 + }, + { + "epoch": 0.8476461139256147, + "grad_norm": 1.0972613113130176, + "learning_rate": 1.3299209770793144e-05, + "loss": 0.3183630108833313, + "step": 1715 + }, + { + "epoch": 0.8481403682194489, + "grad_norm": 1.0799352919589156, + "learning_rate": 1.3291495561591736e-05, + "loss": 0.27138596773147583, + "step": 1716 + }, + { + "epoch": 0.848634622513283, + "grad_norm": 1.1332588592044, + "learning_rate": 1.3283779155034925e-05, + "loss": 0.30252328515052795, + "step": 1717 + }, + { + "epoch": 0.8491288768071172, + "grad_norm": 1.1212549613542353, + "learning_rate": 1.3276060556274067e-05, + "loss": 0.29494598507881165, + "step": 1718 + }, + { + "epoch": 0.8496231311009514, + "grad_norm": 1.01148770717553, + "learning_rate": 1.3268339770461988e-05, + "loss": 0.2822422981262207, + "step": 1719 + }, + { + "epoch": 0.8501173853947857, + "grad_norm": 1.134036508201843, + "learning_rate": 1.3260616802752979e-05, + "loss": 0.3348005712032318, + "step": 1720 + }, + { + "epoch": 0.8506116396886197, + "grad_norm": 1.171053745899539, + "learning_rate": 1.3252891658302782e-05, + "loss": 0.3146229088306427, + "step": 1721 + }, + { + "epoch": 0.851105893982454, + "grad_norm": 1.1635384669674214, + "learning_rate": 1.3245164342268592e-05, + "loss": 0.34189414978027344, + "step": 1722 + }, + { + "epoch": 0.8516001482762882, + "grad_norm": 1.0403207041973201, + "learning_rate": 1.3237434859809055e-05, + "loss": 0.2967323958873749, + "step": 1723 + }, + { + "epoch": 0.8520944025701224, + "grad_norm": 1.1011411329678815, + "learning_rate": 1.3229703216084262e-05, + "loss": 0.329689085483551, + "step": 1724 + }, + { + "epoch": 0.8525886568639565, + "grad_norm": 1.1910259713127598, + "learning_rate": 1.3221969416255751e-05, + "loss": 0.33041107654571533, + "step": 1725 + }, + { + "epoch": 0.8530829111577907, + "grad_norm": 1.144468406694428, + "learning_rate": 1.321423346548649e-05, + "loss": 0.30197203159332275, + "step": 1726 + }, + { + "epoch": 0.8535771654516249, + "grad_norm": 1.1709857904248526, + "learning_rate": 1.3206495368940897e-05, + "loss": 0.29060906171798706, + "step": 1727 + }, + { + "epoch": 0.8540714197454591, + "grad_norm": 1.1769143322358042, + "learning_rate": 1.3198755131784808e-05, + "loss": 0.3119436502456665, + "step": 1728 + }, + { + "epoch": 0.8545656740392932, + "grad_norm": 1.1825299188260439, + "learning_rate": 1.31910127591855e-05, + "loss": 0.35256415605545044, + "step": 1729 + }, + { + "epoch": 0.8550599283331274, + "grad_norm": 1.169751710502227, + "learning_rate": 1.3183268256311665e-05, + "loss": 0.3093785345554352, + "step": 1730 + }, + { + "epoch": 0.8555541826269616, + "grad_norm": 1.0555303314758304, + "learning_rate": 1.317552162833343e-05, + "loss": 0.2713086009025574, + "step": 1731 + }, + { + "epoch": 0.8560484369207958, + "grad_norm": 1.1667835049569328, + "learning_rate": 1.3167772880422325e-05, + "loss": 0.3135699927806854, + "step": 1732 + }, + { + "epoch": 0.8565426912146299, + "grad_norm": 1.2127716623193672, + "learning_rate": 1.3160022017751308e-05, + "loss": 0.3077283501625061, + "step": 1733 + }, + { + "epoch": 0.8570369455084641, + "grad_norm": 1.0914461784602205, + "learning_rate": 1.3152269045494744e-05, + "loss": 0.2900918424129486, + "step": 1734 + }, + { + "epoch": 0.8575311998022983, + "grad_norm": 1.1010374385853228, + "learning_rate": 1.3144513968828406e-05, + "loss": 0.30828869342803955, + "step": 1735 + }, + { + "epoch": 0.8580254540961325, + "grad_norm": 1.2038482894608615, + "learning_rate": 1.3136756792929469e-05, + "loss": 0.32526400685310364, + "step": 1736 + }, + { + "epoch": 0.8585197083899666, + "grad_norm": 1.2033734524328428, + "learning_rate": 1.3128997522976518e-05, + "loss": 0.35023608803749084, + "step": 1737 + }, + { + "epoch": 0.8590139626838008, + "grad_norm": 1.0100870731750684, + "learning_rate": 1.312123616414953e-05, + "loss": 0.27287641167640686, + "step": 1738 + }, + { + "epoch": 0.859508216977635, + "grad_norm": 1.1797907328737691, + "learning_rate": 1.3113472721629871e-05, + "loss": 0.346009761095047, + "step": 1739 + }, + { + "epoch": 0.8600024712714691, + "grad_norm": 1.0724791595798373, + "learning_rate": 1.3105707200600312e-05, + "loss": 0.3297504186630249, + "step": 1740 + }, + { + "epoch": 0.8604967255653033, + "grad_norm": 1.1244989642514696, + "learning_rate": 1.3097939606245005e-05, + "loss": 0.29835087060928345, + "step": 1741 + }, + { + "epoch": 0.8609909798591375, + "grad_norm": 1.1715549927893771, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.31466037034988403, + "step": 1742 + }, + { + "epoch": 0.8614852341529717, + "grad_norm": 1.1357283105102867, + "learning_rate": 1.3082398218300646e-05, + "loss": 0.32722294330596924, + "step": 1743 + }, + { + "epoch": 0.8619794884468058, + "grad_norm": 1.0679826147860039, + "learning_rate": 1.3074624435086809e-05, + "loss": 0.2603963613510132, + "step": 1744 + }, + { + "epoch": 0.86247374274064, + "grad_norm": 1.3274641459704042, + "learning_rate": 1.3066848599297633e-05, + "loss": 0.3100607991218567, + "step": 1745 + }, + { + "epoch": 0.8629679970344742, + "grad_norm": 1.0941880035602098, + "learning_rate": 1.3059070716124145e-05, + "loss": 0.2772334814071655, + "step": 1746 + }, + { + "epoch": 0.8634622513283084, + "grad_norm": 1.0597299652706509, + "learning_rate": 1.305129079075876e-05, + "loss": 0.3097267746925354, + "step": 1747 + }, + { + "epoch": 0.8639565056221425, + "grad_norm": 0.9828148026871187, + "learning_rate": 1.304350882839524e-05, + "loss": 0.24734097719192505, + "step": 1748 + }, + { + "epoch": 0.8644507599159768, + "grad_norm": 1.1998311484351691, + "learning_rate": 1.3035724834228713e-05, + "loss": 0.32148587703704834, + "step": 1749 + }, + { + "epoch": 0.864945014209811, + "grad_norm": 1.0741747588028856, + "learning_rate": 1.3027938813455663e-05, + "loss": 0.3037404417991638, + "step": 1750 + }, + { + "epoch": 0.8654392685036452, + "grad_norm": 1.011225470292042, + "learning_rate": 1.3020150771273925e-05, + "loss": 0.30760154128074646, + "step": 1751 + }, + { + "epoch": 0.8659335227974793, + "grad_norm": 1.2184140511087935, + "learning_rate": 1.3012360712882681e-05, + "loss": 0.3169519305229187, + "step": 1752 + }, + { + "epoch": 0.8664277770913135, + "grad_norm": 1.1637013896789081, + "learning_rate": 1.300456864348247e-05, + "loss": 0.32497861981391907, + "step": 1753 + }, + { + "epoch": 0.8669220313851477, + "grad_norm": 1.1775374726585146, + "learning_rate": 1.2996774568275163e-05, + "loss": 0.3318047821521759, + "step": 1754 + }, + { + "epoch": 0.8674162856789819, + "grad_norm": 1.1616476146855954, + "learning_rate": 1.298897849246397e-05, + "loss": 0.32553863525390625, + "step": 1755 + }, + { + "epoch": 0.867910539972816, + "grad_norm": 1.2503555115463478, + "learning_rate": 1.2981180421253446e-05, + "loss": 0.36457520723342896, + "step": 1756 + }, + { + "epoch": 0.8684047942666502, + "grad_norm": 1.169094604095011, + "learning_rate": 1.2973380359849466e-05, + "loss": 0.3038361668586731, + "step": 1757 + }, + { + "epoch": 0.8688990485604844, + "grad_norm": 1.1027242700855706, + "learning_rate": 1.2965578313459246e-05, + "loss": 0.3219846785068512, + "step": 1758 + }, + { + "epoch": 0.8693933028543186, + "grad_norm": 1.1142620036406827, + "learning_rate": 1.2957774287291311e-05, + "loss": 0.3180781304836273, + "step": 1759 + }, + { + "epoch": 0.8698875571481527, + "grad_norm": 1.0882143758359024, + "learning_rate": 1.2949968286555527e-05, + "loss": 0.27302947640419006, + "step": 1760 + }, + { + "epoch": 0.8703818114419869, + "grad_norm": 1.1731384509972838, + "learning_rate": 1.2942160316463066e-05, + "loss": 0.31756314635276794, + "step": 1761 + }, + { + "epoch": 0.8708760657358211, + "grad_norm": 1.1248252560155063, + "learning_rate": 1.2934350382226412e-05, + "loss": 0.2921680510044098, + "step": 1762 + }, + { + "epoch": 0.8713703200296553, + "grad_norm": 1.2402414216219324, + "learning_rate": 1.2926538489059373e-05, + "loss": 0.36426985263824463, + "step": 1763 + }, + { + "epoch": 0.8718645743234894, + "grad_norm": 1.1314972333554063, + "learning_rate": 1.2918724642177054e-05, + "loss": 0.31873831152915955, + "step": 1764 + }, + { + "epoch": 0.8723588286173236, + "grad_norm": 1.0875738556359984, + "learning_rate": 1.2910908846795867e-05, + "loss": 0.30952733755111694, + "step": 1765 + }, + { + "epoch": 0.8728530829111578, + "grad_norm": 1.149047421683754, + "learning_rate": 1.2903091108133523e-05, + "loss": 0.33339035511016846, + "step": 1766 + }, + { + "epoch": 0.8733473372049919, + "grad_norm": 1.1272534729456736, + "learning_rate": 1.2895271431409038e-05, + "loss": 0.31531351804733276, + "step": 1767 + }, + { + "epoch": 0.8738415914988261, + "grad_norm": 1.0832848102714157, + "learning_rate": 1.2887449821842713e-05, + "loss": 0.3016526401042938, + "step": 1768 + }, + { + "epoch": 0.8743358457926603, + "grad_norm": 1.1203275148695215, + "learning_rate": 1.2879626284656141e-05, + "loss": 0.3364630341529846, + "step": 1769 + }, + { + "epoch": 0.8748301000864945, + "grad_norm": 1.049317791331816, + "learning_rate": 1.287180082507221e-05, + "loss": 0.29755398631095886, + "step": 1770 + }, + { + "epoch": 0.8753243543803286, + "grad_norm": 1.1616312337400467, + "learning_rate": 1.286397344831508e-05, + "loss": 0.2986103892326355, + "step": 1771 + }, + { + "epoch": 0.8758186086741628, + "grad_norm": 1.0876002245947722, + "learning_rate": 1.2856144159610197e-05, + "loss": 0.31291434168815613, + "step": 1772 + }, + { + "epoch": 0.876312862967997, + "grad_norm": 1.0995747953439883, + "learning_rate": 1.2848312964184283e-05, + "loss": 0.28285568952560425, + "step": 1773 + }, + { + "epoch": 0.8768071172618312, + "grad_norm": 1.1726581514839194, + "learning_rate": 1.2840479867265331e-05, + "loss": 0.3319891095161438, + "step": 1774 + }, + { + "epoch": 0.8773013715556653, + "grad_norm": 1.1459648615093148, + "learning_rate": 1.2832644874082604e-05, + "loss": 0.3265117406845093, + "step": 1775 + }, + { + "epoch": 0.8777956258494996, + "grad_norm": 1.1247446577558389, + "learning_rate": 1.2824807989866635e-05, + "loss": 0.32061511278152466, + "step": 1776 + }, + { + "epoch": 0.8782898801433338, + "grad_norm": 1.2889890707472673, + "learning_rate": 1.2816969219849214e-05, + "loss": 0.34278666973114014, + "step": 1777 + }, + { + "epoch": 0.878784134437168, + "grad_norm": 1.0560963427574246, + "learning_rate": 1.2809128569263387e-05, + "loss": 0.28335195779800415, + "step": 1778 + }, + { + "epoch": 0.8792783887310021, + "grad_norm": 1.261751311219404, + "learning_rate": 1.2801286043343468e-05, + "loss": 0.35037046670913696, + "step": 1779 + }, + { + "epoch": 0.8797726430248363, + "grad_norm": 1.2110241423671546, + "learning_rate": 1.2793441647325012e-05, + "loss": 0.30058878660202026, + "step": 1780 + }, + { + "epoch": 0.8802668973186705, + "grad_norm": 1.1234244113929972, + "learning_rate": 1.2785595386444824e-05, + "loss": 0.29526466131210327, + "step": 1781 + }, + { + "epoch": 0.8807611516125047, + "grad_norm": 1.128737037655087, + "learning_rate": 1.2777747265940956e-05, + "loss": 0.3194332718849182, + "step": 1782 + }, + { + "epoch": 0.8812554059063388, + "grad_norm": 1.1751557862784823, + "learning_rate": 1.2769897291052709e-05, + "loss": 0.33527326583862305, + "step": 1783 + }, + { + "epoch": 0.881749660200173, + "grad_norm": 1.1396387575620477, + "learning_rate": 1.2762045467020601e-05, + "loss": 0.3277815580368042, + "step": 1784 + }, + { + "epoch": 0.8822439144940072, + "grad_norm": 1.2472488401817894, + "learning_rate": 1.2754191799086406e-05, + "loss": 0.31030380725860596, + "step": 1785 + }, + { + "epoch": 0.8827381687878414, + "grad_norm": 1.2316289072611675, + "learning_rate": 1.274633629249312e-05, + "loss": 0.34496408700942993, + "step": 1786 + }, + { + "epoch": 0.8832324230816755, + "grad_norm": 1.1907817971144352, + "learning_rate": 1.2738478952484964e-05, + "loss": 0.31008201837539673, + "step": 1787 + }, + { + "epoch": 0.8837266773755097, + "grad_norm": 1.1874449425538405, + "learning_rate": 1.2730619784307388e-05, + "loss": 0.35956043004989624, + "step": 1788 + }, + { + "epoch": 0.8842209316693439, + "grad_norm": 1.075617061625236, + "learning_rate": 1.272275879320706e-05, + "loss": 0.2944573760032654, + "step": 1789 + }, + { + "epoch": 0.8847151859631781, + "grad_norm": 1.0739187183942678, + "learning_rate": 1.2714895984431863e-05, + "loss": 0.2941366136074066, + "step": 1790 + }, + { + "epoch": 0.8852094402570122, + "grad_norm": 1.3130838842625934, + "learning_rate": 1.2707031363230901e-05, + "loss": 0.34683144092559814, + "step": 1791 + }, + { + "epoch": 0.8857036945508464, + "grad_norm": 1.1309361657268096, + "learning_rate": 1.2699164934854475e-05, + "loss": 0.3014514744281769, + "step": 1792 + }, + { + "epoch": 0.8861979488446806, + "grad_norm": 1.1642635843186193, + "learning_rate": 1.2691296704554112e-05, + "loss": 0.2749955654144287, + "step": 1793 + }, + { + "epoch": 0.8866922031385147, + "grad_norm": 1.2406193113190336, + "learning_rate": 1.2683426677582518e-05, + "loss": 0.3707960844039917, + "step": 1794 + }, + { + "epoch": 0.8871864574323489, + "grad_norm": 1.098057655891237, + "learning_rate": 1.2675554859193615e-05, + "loss": 0.3122541606426239, + "step": 1795 + }, + { + "epoch": 0.8876807117261831, + "grad_norm": 1.1564617646628, + "learning_rate": 1.2667681254642521e-05, + "loss": 0.3072753846645355, + "step": 1796 + }, + { + "epoch": 0.8881749660200173, + "grad_norm": 1.1000251936377918, + "learning_rate": 1.2659805869185534e-05, + "loss": 0.27002331614494324, + "step": 1797 + }, + { + "epoch": 0.8886692203138514, + "grad_norm": 1.0649961261949041, + "learning_rate": 1.2651928708080155e-05, + "loss": 0.2775167226791382, + "step": 1798 + }, + { + "epoch": 0.8891634746076856, + "grad_norm": 1.0134446617324497, + "learning_rate": 1.2644049776585061e-05, + "loss": 0.30023425817489624, + "step": 1799 + }, + { + "epoch": 0.8896577289015198, + "grad_norm": 1.0536326288458973, + "learning_rate": 1.2636169079960116e-05, + "loss": 0.29491451382637024, + "step": 1800 + }, + { + "epoch": 0.890151983195354, + "grad_norm": 1.1393442237009457, + "learning_rate": 1.2628286623466359e-05, + "loss": 0.3069722652435303, + "step": 1801 + }, + { + "epoch": 0.8906462374891881, + "grad_norm": 1.0432479678380786, + "learning_rate": 1.2620402412366006e-05, + "loss": 0.30594444274902344, + "step": 1802 + }, + { + "epoch": 0.8911404917830223, + "grad_norm": 1.224543789313884, + "learning_rate": 1.2612516451922442e-05, + "loss": 0.278346985578537, + "step": 1803 + }, + { + "epoch": 0.8916347460768566, + "grad_norm": 1.2157715105375322, + "learning_rate": 1.2604628747400227e-05, + "loss": 0.2985970973968506, + "step": 1804 + }, + { + "epoch": 0.8921290003706908, + "grad_norm": 1.1226040901686842, + "learning_rate": 1.259673930406507e-05, + "loss": 0.31054627895355225, + "step": 1805 + }, + { + "epoch": 0.8926232546645249, + "grad_norm": 1.1289150487077437, + "learning_rate": 1.258884812718386e-05, + "loss": 0.28903907537460327, + "step": 1806 + }, + { + "epoch": 0.8931175089583591, + "grad_norm": 1.132448586420589, + "learning_rate": 1.258095522202463e-05, + "loss": 0.2937915027141571, + "step": 1807 + }, + { + "epoch": 0.8936117632521933, + "grad_norm": 1.251676196860064, + "learning_rate": 1.257306059385657e-05, + "loss": 0.3038950562477112, + "step": 1808 + }, + { + "epoch": 0.8941060175460275, + "grad_norm": 1.235431629529867, + "learning_rate": 1.2565164247950023e-05, + "loss": 0.3081057071685791, + "step": 1809 + }, + { + "epoch": 0.8946002718398616, + "grad_norm": 1.1023391071403523, + "learning_rate": 1.2557266189576478e-05, + "loss": 0.2608702480792999, + "step": 1810 + }, + { + "epoch": 0.8950945261336958, + "grad_norm": 1.1733196648921136, + "learning_rate": 1.254936642400856e-05, + "loss": 0.2829548120498657, + "step": 1811 + }, + { + "epoch": 0.89558878042753, + "grad_norm": 1.1986500261226571, + "learning_rate": 1.2541464956520045e-05, + "loss": 0.3157985508441925, + "step": 1812 + }, + { + "epoch": 0.8960830347213642, + "grad_norm": 1.1805347109907394, + "learning_rate": 1.2533561792385837e-05, + "loss": 0.2948974370956421, + "step": 1813 + }, + { + "epoch": 0.8965772890151983, + "grad_norm": 1.1460952132203441, + "learning_rate": 1.252565693688198e-05, + "loss": 0.3011903166770935, + "step": 1814 + }, + { + "epoch": 0.8970715433090325, + "grad_norm": 1.3055245186221631, + "learning_rate": 1.2517750395285635e-05, + "loss": 0.3570353388786316, + "step": 1815 + }, + { + "epoch": 0.8975657976028667, + "grad_norm": 1.1337741379781219, + "learning_rate": 1.2509842172875105e-05, + "loss": 0.30166712403297424, + "step": 1816 + }, + { + "epoch": 0.8980600518967009, + "grad_norm": 1.104423129790351, + "learning_rate": 1.2501932274929797e-05, + "loss": 0.3260636329650879, + "step": 1817 + }, + { + "epoch": 0.898554306190535, + "grad_norm": 1.0975906386988825, + "learning_rate": 1.2494020706730251e-05, + "loss": 0.31647035479545593, + "step": 1818 + }, + { + "epoch": 0.8990485604843692, + "grad_norm": 1.2099925292750648, + "learning_rate": 1.2486107473558118e-05, + "loss": 0.3059273064136505, + "step": 1819 + }, + { + "epoch": 0.8995428147782034, + "grad_norm": 1.1238527206258473, + "learning_rate": 1.247819258069616e-05, + "loss": 0.31050577759742737, + "step": 1820 + }, + { + "epoch": 0.9000370690720375, + "grad_norm": 1.167261413544568, + "learning_rate": 1.2470276033428241e-05, + "loss": 0.3199779689311981, + "step": 1821 + }, + { + "epoch": 0.9005313233658717, + "grad_norm": 1.1634621252313533, + "learning_rate": 1.2462357837039338e-05, + "loss": 0.31346091628074646, + "step": 1822 + }, + { + "epoch": 0.9010255776597059, + "grad_norm": 1.7712393639688087, + "learning_rate": 1.245443799681553e-05, + "loss": 0.31128326058387756, + "step": 1823 + }, + { + "epoch": 0.9015198319535401, + "grad_norm": 1.0665988205220116, + "learning_rate": 1.244651651804398e-05, + "loss": 0.27540329098701477, + "step": 1824 + }, + { + "epoch": 0.9020140862473742, + "grad_norm": 1.08908725997666, + "learning_rate": 1.243859340601296e-05, + "loss": 0.2613363265991211, + "step": 1825 + }, + { + "epoch": 0.9025083405412084, + "grad_norm": 1.1499718586586674, + "learning_rate": 1.2430668666011825e-05, + "loss": 0.30530184507369995, + "step": 1826 + }, + { + "epoch": 0.9030025948350426, + "grad_norm": 1.0907140946424856, + "learning_rate": 1.2422742303331022e-05, + "loss": 0.3223349153995514, + "step": 1827 + }, + { + "epoch": 0.9034968491288768, + "grad_norm": 1.131086049145241, + "learning_rate": 1.2414814323262067e-05, + "loss": 0.32017287611961365, + "step": 1828 + }, + { + "epoch": 0.9039911034227109, + "grad_norm": 1.2183101338845472, + "learning_rate": 1.2406884731097582e-05, + "loss": 0.2965891361236572, + "step": 1829 + }, + { + "epoch": 0.9044853577165451, + "grad_norm": 1.535326476461108, + "learning_rate": 1.2398953532131235e-05, + "loss": 0.3517727851867676, + "step": 1830 + }, + { + "epoch": 0.9049796120103794, + "grad_norm": 1.0055415215772612, + "learning_rate": 1.2391020731657788e-05, + "loss": 0.26107311248779297, + "step": 1831 + }, + { + "epoch": 0.9054738663042136, + "grad_norm": 1.16405975535122, + "learning_rate": 1.2383086334973065e-05, + "loss": 0.31327998638153076, + "step": 1832 + }, + { + "epoch": 0.9059681205980477, + "grad_norm": 1.1376729658041929, + "learning_rate": 1.2375150347373956e-05, + "loss": 0.2708127498626709, + "step": 1833 + }, + { + "epoch": 0.9064623748918819, + "grad_norm": 1.2578266997569258, + "learning_rate": 1.236721277415841e-05, + "loss": 0.3264025151729584, + "step": 1834 + }, + { + "epoch": 0.9069566291857161, + "grad_norm": 1.1552886471917594, + "learning_rate": 1.2359273620625438e-05, + "loss": 0.3226723074913025, + "step": 1835 + }, + { + "epoch": 0.9074508834795503, + "grad_norm": 1.095230882373492, + "learning_rate": 1.2351332892075109e-05, + "loss": 0.2895771861076355, + "step": 1836 + }, + { + "epoch": 0.9079451377733844, + "grad_norm": 1.149733162695983, + "learning_rate": 1.234339059380854e-05, + "loss": 0.3316076397895813, + "step": 1837 + }, + { + "epoch": 0.9084393920672186, + "grad_norm": 1.10037368979265, + "learning_rate": 1.2335446731127887e-05, + "loss": 0.29858651757240295, + "step": 1838 + }, + { + "epoch": 0.9089336463610528, + "grad_norm": 1.2759313559643695, + "learning_rate": 1.2327501309336371e-05, + "loss": 0.31340792775154114, + "step": 1839 + }, + { + "epoch": 0.909427900654887, + "grad_norm": 1.038203202123546, + "learning_rate": 1.2319554333738236e-05, + "loss": 0.27344945073127747, + "step": 1840 + }, + { + "epoch": 0.9099221549487211, + "grad_norm": 1.1811761633875792, + "learning_rate": 1.2311605809638766e-05, + "loss": 0.27349725365638733, + "step": 1841 + }, + { + "epoch": 0.9104164092425553, + "grad_norm": 1.2931266398373575, + "learning_rate": 1.2303655742344292e-05, + "loss": 0.28933316469192505, + "step": 1842 + }, + { + "epoch": 0.9109106635363895, + "grad_norm": 1.1360201134878805, + "learning_rate": 1.2295704137162158e-05, + "loss": 0.3315466344356537, + "step": 1843 + }, + { + "epoch": 0.9114049178302237, + "grad_norm": 1.3735184410271417, + "learning_rate": 1.2287750999400743e-05, + "loss": 0.3227408528327942, + "step": 1844 + }, + { + "epoch": 0.9118991721240578, + "grad_norm": 1.1237568254849295, + "learning_rate": 1.2279796334369447e-05, + "loss": 0.30476877093315125, + "step": 1845 + }, + { + "epoch": 0.912393426417892, + "grad_norm": 1.1863082805694927, + "learning_rate": 1.2271840147378697e-05, + "loss": 0.29941046237945557, + "step": 1846 + }, + { + "epoch": 0.9128876807117262, + "grad_norm": 1.040665730868043, + "learning_rate": 1.2263882443739923e-05, + "loss": 0.26635122299194336, + "step": 1847 + }, + { + "epoch": 0.9133819350055603, + "grad_norm": 1.2009768589181191, + "learning_rate": 1.2255923228765574e-05, + "loss": 0.32384809851646423, + "step": 1848 + }, + { + "epoch": 0.9138761892993945, + "grad_norm": 1.1005403546735195, + "learning_rate": 1.2247962507769113e-05, + "loss": 0.2830178141593933, + "step": 1849 + }, + { + "epoch": 0.9143704435932287, + "grad_norm": 1.146384025635135, + "learning_rate": 1.2240000286065003e-05, + "loss": 0.32860931754112244, + "step": 1850 + }, + { + "epoch": 0.9148646978870629, + "grad_norm": 1.1448106720128721, + "learning_rate": 1.2232036568968703e-05, + "loss": 0.2820647954940796, + "step": 1851 + }, + { + "epoch": 0.915358952180897, + "grad_norm": 1.2180250787611469, + "learning_rate": 1.2224071361796685e-05, + "loss": 0.3368694484233856, + "step": 1852 + }, + { + "epoch": 0.9158532064747312, + "grad_norm": 1.1414152376911786, + "learning_rate": 1.2216104669866405e-05, + "loss": 0.32594096660614014, + "step": 1853 + }, + { + "epoch": 0.9163474607685654, + "grad_norm": 1.129839927585001, + "learning_rate": 1.2208136498496307e-05, + "loss": 0.3383556008338928, + "step": 1854 + }, + { + "epoch": 0.9168417150623996, + "grad_norm": 1.1516691565096748, + "learning_rate": 1.2200166853005837e-05, + "loss": 0.2655363976955414, + "step": 1855 + }, + { + "epoch": 0.9173359693562337, + "grad_norm": 1.067780593954706, + "learning_rate": 1.2192195738715414e-05, + "loss": 0.30512773990631104, + "step": 1856 + }, + { + "epoch": 0.917830223650068, + "grad_norm": 1.3304322049937938, + "learning_rate": 1.2184223160946433e-05, + "loss": 0.34026995301246643, + "step": 1857 + }, + { + "epoch": 0.9183244779439022, + "grad_norm": 1.2079696433735554, + "learning_rate": 1.2176249125021281e-05, + "loss": 0.29324328899383545, + "step": 1858 + }, + { + "epoch": 0.9188187322377364, + "grad_norm": 1.454623275441196, + "learning_rate": 1.2168273636263308e-05, + "loss": 0.3114206790924072, + "step": 1859 + }, + { + "epoch": 0.9193129865315705, + "grad_norm": 1.1301917440411622, + "learning_rate": 1.2160296699996839e-05, + "loss": 0.2829141914844513, + "step": 1860 + }, + { + "epoch": 0.9198072408254047, + "grad_norm": 1.0721269081592821, + "learning_rate": 1.2152318321547156e-05, + "loss": 0.2735600769519806, + "step": 1861 + }, + { + "epoch": 0.9203014951192389, + "grad_norm": 1.0465335380212768, + "learning_rate": 1.2144338506240519e-05, + "loss": 0.3160930573940277, + "step": 1862 + }, + { + "epoch": 0.9207957494130731, + "grad_norm": 1.0735769631967078, + "learning_rate": 1.2136357259404128e-05, + "loss": 0.26677393913269043, + "step": 1863 + }, + { + "epoch": 0.9212900037069072, + "grad_norm": 1.1305004585474958, + "learning_rate": 1.2128374586366159e-05, + "loss": 0.33033064007759094, + "step": 1864 + }, + { + "epoch": 0.9217842580007414, + "grad_norm": 1.1210908469065626, + "learning_rate": 1.2120390492455727e-05, + "loss": 0.28271663188934326, + "step": 1865 + }, + { + "epoch": 0.9222785122945756, + "grad_norm": 1.1196923913120616, + "learning_rate": 1.21124049830029e-05, + "loss": 0.3116013705730438, + "step": 1866 + }, + { + "epoch": 0.9227727665884098, + "grad_norm": 1.1258678919425735, + "learning_rate": 1.2104418063338686e-05, + "loss": 0.30614158511161804, + "step": 1867 + }, + { + "epoch": 0.9232670208822439, + "grad_norm": 1.2128311616527454, + "learning_rate": 1.2096429738795041e-05, + "loss": 0.34351983666419983, + "step": 1868 + }, + { + "epoch": 0.9237612751760781, + "grad_norm": 1.2814647055659063, + "learning_rate": 1.2088440014704858e-05, + "loss": 0.31006965041160583, + "step": 1869 + }, + { + "epoch": 0.9242555294699123, + "grad_norm": 1.093225958461299, + "learning_rate": 1.2080448896401964e-05, + "loss": 0.2671147584915161, + "step": 1870 + }, + { + "epoch": 0.9247497837637465, + "grad_norm": 1.2298582810409653, + "learning_rate": 1.207245638922111e-05, + "loss": 0.29123416543006897, + "step": 1871 + }, + { + "epoch": 0.9252440380575806, + "grad_norm": 1.1613532669189326, + "learning_rate": 1.2064462498497984e-05, + "loss": 0.31838539242744446, + "step": 1872 + }, + { + "epoch": 0.9257382923514148, + "grad_norm": 1.1861407153761483, + "learning_rate": 1.205646722956919e-05, + "loss": 0.3158906102180481, + "step": 1873 + }, + { + "epoch": 0.926232546645249, + "grad_norm": 1.2339017273841688, + "learning_rate": 1.2048470587772257e-05, + "loss": 0.3679552674293518, + "step": 1874 + }, + { + "epoch": 0.9267268009390831, + "grad_norm": 1.1210108605660978, + "learning_rate": 1.204047257844563e-05, + "loss": 0.2891008257865906, + "step": 1875 + }, + { + "epoch": 0.9272210552329173, + "grad_norm": 1.1110723692294957, + "learning_rate": 1.2032473206928663e-05, + "loss": 0.3207235634326935, + "step": 1876 + }, + { + "epoch": 0.9277153095267515, + "grad_norm": 1.203189154519193, + "learning_rate": 1.2024472478561624e-05, + "loss": 0.2710658311843872, + "step": 1877 + }, + { + "epoch": 0.9282095638205857, + "grad_norm": 1.1156076578026985, + "learning_rate": 1.2016470398685685e-05, + "loss": 0.2554836869239807, + "step": 1878 + }, + { + "epoch": 0.9287038181144198, + "grad_norm": 1.079454168196498, + "learning_rate": 1.2008466972642921e-05, + "loss": 0.2822943329811096, + "step": 1879 + }, + { + "epoch": 0.929198072408254, + "grad_norm": 1.2007950112208574, + "learning_rate": 1.20004622057763e-05, + "loss": 0.3447754681110382, + "step": 1880 + }, + { + "epoch": 0.9296923267020882, + "grad_norm": 1.1885607345269107, + "learning_rate": 1.1992456103429694e-05, + "loss": 0.3009227514266968, + "step": 1881 + }, + { + "epoch": 0.9301865809959224, + "grad_norm": 1.3491102685763696, + "learning_rate": 1.1984448670947863e-05, + "loss": 0.33154594898223877, + "step": 1882 + }, + { + "epoch": 0.9306808352897565, + "grad_norm": 1.7075348805187878, + "learning_rate": 1.1976439913676457e-05, + "loss": 0.32905343174934387, + "step": 1883 + }, + { + "epoch": 0.9311750895835907, + "grad_norm": 1.2010662669423082, + "learning_rate": 1.1968429836962e-05, + "loss": 0.34757447242736816, + "step": 1884 + }, + { + "epoch": 0.931669343877425, + "grad_norm": 1.2626693752273819, + "learning_rate": 1.1960418446151912e-05, + "loss": 0.29980987310409546, + "step": 1885 + }, + { + "epoch": 0.9321635981712592, + "grad_norm": 1.081439601568963, + "learning_rate": 1.1952405746594477e-05, + "loss": 0.3106808662414551, + "step": 1886 + }, + { + "epoch": 0.9326578524650933, + "grad_norm": 1.2465315131717423, + "learning_rate": 1.1944391743638863e-05, + "loss": 0.3222411572933197, + "step": 1887 + }, + { + "epoch": 0.9331521067589275, + "grad_norm": 1.117897007008322, + "learning_rate": 1.1936376442635104e-05, + "loss": 0.3365646302700043, + "step": 1888 + }, + { + "epoch": 0.9336463610527617, + "grad_norm": 1.2223325106102665, + "learning_rate": 1.1928359848934101e-05, + "loss": 0.32500627636909485, + "step": 1889 + }, + { + "epoch": 0.9341406153465959, + "grad_norm": 1.1692844365001853, + "learning_rate": 1.1920341967887614e-05, + "loss": 0.31395500898361206, + "step": 1890 + }, + { + "epoch": 0.93463486964043, + "grad_norm": 1.084320264091655, + "learning_rate": 1.1912322804848268e-05, + "loss": 0.3060624301433563, + "step": 1891 + }, + { + "epoch": 0.9351291239342642, + "grad_norm": 1.042165685734395, + "learning_rate": 1.190430236516954e-05, + "loss": 0.2644454836845398, + "step": 1892 + }, + { + "epoch": 0.9356233782280984, + "grad_norm": 1.2086818065931575, + "learning_rate": 1.1896280654205765e-05, + "loss": 0.33404678106307983, + "step": 1893 + }, + { + "epoch": 0.9361176325219326, + "grad_norm": 1.0362894963118763, + "learning_rate": 1.1888257677312119e-05, + "loss": 0.28557512164115906, + "step": 1894 + }, + { + "epoch": 0.9366118868157667, + "grad_norm": 1.1281245501630466, + "learning_rate": 1.1880233439844623e-05, + "loss": 0.3332308530807495, + "step": 1895 + }, + { + "epoch": 0.9371061411096009, + "grad_norm": 1.0648316720915905, + "learning_rate": 1.1872207947160155e-05, + "loss": 0.3274528384208679, + "step": 1896 + }, + { + "epoch": 0.9376003954034351, + "grad_norm": 1.168900116977035, + "learning_rate": 1.1864181204616404e-05, + "loss": 0.297880083322525, + "step": 1897 + }, + { + "epoch": 0.9380946496972693, + "grad_norm": 1.1118774536365064, + "learning_rate": 1.1856153217571924e-05, + "loss": 0.3404296040534973, + "step": 1898 + }, + { + "epoch": 0.9385889039911034, + "grad_norm": 1.7308625403608067, + "learning_rate": 1.1848123991386073e-05, + "loss": 0.32343849539756775, + "step": 1899 + }, + { + "epoch": 0.9390831582849376, + "grad_norm": 1.0296882466024648, + "learning_rate": 1.1840093531419052e-05, + "loss": 0.26679158210754395, + "step": 1900 + }, + { + "epoch": 0.9395774125787718, + "grad_norm": 1.0019153721179144, + "learning_rate": 1.1832061843031884e-05, + "loss": 0.28106996417045593, + "step": 1901 + }, + { + "epoch": 0.9400716668726059, + "grad_norm": 1.1236721425678955, + "learning_rate": 1.1824028931586406e-05, + "loss": 0.28356847167015076, + "step": 1902 + }, + { + "epoch": 0.9405659211664401, + "grad_norm": 1.2443758247501144, + "learning_rate": 1.1815994802445274e-05, + "loss": 0.3256348669528961, + "step": 1903 + }, + { + "epoch": 0.9410601754602743, + "grad_norm": 1.1514727386744015, + "learning_rate": 1.1807959460971958e-05, + "loss": 0.2781906723976135, + "step": 1904 + }, + { + "epoch": 0.9415544297541085, + "grad_norm": 1.2599759308188183, + "learning_rate": 1.1799922912530741e-05, + "loss": 0.3129916787147522, + "step": 1905 + }, + { + "epoch": 0.9420486840479426, + "grad_norm": 1.1353254220103308, + "learning_rate": 1.1791885162486705e-05, + "loss": 0.281986266374588, + "step": 1906 + }, + { + "epoch": 0.9425429383417768, + "grad_norm": 1.2313275172087987, + "learning_rate": 1.1783846216205734e-05, + "loss": 0.33587342500686646, + "step": 1907 + }, + { + "epoch": 0.943037192635611, + "grad_norm": 1.0408682927660702, + "learning_rate": 1.1775806079054522e-05, + "loss": 0.27715635299682617, + "step": 1908 + }, + { + "epoch": 0.9435314469294452, + "grad_norm": 1.1581221243071849, + "learning_rate": 1.1767764756400541e-05, + "loss": 0.3190307915210724, + "step": 1909 + }, + { + "epoch": 0.9440257012232793, + "grad_norm": 1.1962319364965919, + "learning_rate": 1.175972225361207e-05, + "loss": 0.29336807131767273, + "step": 1910 + }, + { + "epoch": 0.9445199555171135, + "grad_norm": 1.1448708364637925, + "learning_rate": 1.1751678576058164e-05, + "loss": 0.3001596927642822, + "step": 1911 + }, + { + "epoch": 0.9450142098109477, + "grad_norm": 1.0832545536390727, + "learning_rate": 1.1743633729108672e-05, + "loss": 0.26952457427978516, + "step": 1912 + }, + { + "epoch": 0.945508464104782, + "grad_norm": 1.166519142960908, + "learning_rate": 1.1735587718134212e-05, + "loss": 0.3193609118461609, + "step": 1913 + }, + { + "epoch": 0.946002718398616, + "grad_norm": 1.2095746348772163, + "learning_rate": 1.172754054850619e-05, + "loss": 0.2810664176940918, + "step": 1914 + }, + { + "epoch": 0.9464969726924503, + "grad_norm": 1.1743627712454017, + "learning_rate": 1.1719492225596783e-05, + "loss": 0.28850311040878296, + "step": 1915 + }, + { + "epoch": 0.9469912269862845, + "grad_norm": 1.1739524489187587, + "learning_rate": 1.1711442754778936e-05, + "loss": 0.32268932461738586, + "step": 1916 + }, + { + "epoch": 0.9474854812801187, + "grad_norm": 1.2236575262685914, + "learning_rate": 1.1703392141426356e-05, + "loss": 0.3149149715900421, + "step": 1917 + }, + { + "epoch": 0.9479797355739528, + "grad_norm": 1.0472038436966378, + "learning_rate": 1.1695340390913526e-05, + "loss": 0.2537482678890228, + "step": 1918 + }, + { + "epoch": 0.948473989867787, + "grad_norm": 1.1232208833213926, + "learning_rate": 1.168728750861567e-05, + "loss": 0.2611936330795288, + "step": 1919 + }, + { + "epoch": 0.9489682441616212, + "grad_norm": 1.0077623948815433, + "learning_rate": 1.1679233499908781e-05, + "loss": 0.263653427362442, + "step": 1920 + }, + { + "epoch": 0.9494624984554554, + "grad_norm": 1.1707561168968341, + "learning_rate": 1.1671178370169604e-05, + "loss": 0.3122594952583313, + "step": 1921 + }, + { + "epoch": 0.9499567527492895, + "grad_norm": 1.1924449722361925, + "learning_rate": 1.1663122124775626e-05, + "loss": 0.3101043701171875, + "step": 1922 + }, + { + "epoch": 0.9504510070431237, + "grad_norm": 1.129901320884474, + "learning_rate": 1.1655064769105077e-05, + "loss": 0.295572966337204, + "step": 1923 + }, + { + "epoch": 0.9509452613369579, + "grad_norm": 1.1537509505815167, + "learning_rate": 1.1647006308536937e-05, + "loss": 0.29732125997543335, + "step": 1924 + }, + { + "epoch": 0.951439515630792, + "grad_norm": 1.1914038253365087, + "learning_rate": 1.1638946748450922e-05, + "loss": 0.32320737838745117, + "step": 1925 + }, + { + "epoch": 0.9519337699246262, + "grad_norm": 1.2581984463314084, + "learning_rate": 1.1630886094227471e-05, + "loss": 0.3306753933429718, + "step": 1926 + }, + { + "epoch": 0.9524280242184604, + "grad_norm": 1.0367245477692144, + "learning_rate": 1.1622824351247767e-05, + "loss": 0.2368355095386505, + "step": 1927 + }, + { + "epoch": 0.9529222785122946, + "grad_norm": 1.2216253394681036, + "learning_rate": 1.1614761524893715e-05, + "loss": 0.28470784425735474, + "step": 1928 + }, + { + "epoch": 0.9534165328061287, + "grad_norm": 1.1721810384499396, + "learning_rate": 1.160669762054794e-05, + "loss": 0.34468895196914673, + "step": 1929 + }, + { + "epoch": 0.9539107870999629, + "grad_norm": 1.1277795177992218, + "learning_rate": 1.1598632643593787e-05, + "loss": 0.30562442541122437, + "step": 1930 + }, + { + "epoch": 0.9544050413937971, + "grad_norm": 1.2141650113141733, + "learning_rate": 1.159056659941533e-05, + "loss": 0.2861478924751282, + "step": 1931 + }, + { + "epoch": 0.9548992956876313, + "grad_norm": 1.0692532214940453, + "learning_rate": 1.1582499493397332e-05, + "loss": 0.32385969161987305, + "step": 1932 + }, + { + "epoch": 0.9553935499814654, + "grad_norm": 1.173323189937386, + "learning_rate": 1.1574431330925287e-05, + "loss": 0.2935449481010437, + "step": 1933 + }, + { + "epoch": 0.9558878042752996, + "grad_norm": 1.1041433205065538, + "learning_rate": 1.156636211738538e-05, + "loss": 0.29380083084106445, + "step": 1934 + }, + { + "epoch": 0.9563820585691338, + "grad_norm": 1.1455066452691371, + "learning_rate": 1.1558291858164503e-05, + "loss": 0.2957204282283783, + "step": 1935 + }, + { + "epoch": 0.956876312862968, + "grad_norm": 1.084977751415868, + "learning_rate": 1.1550220558650246e-05, + "loss": 0.26402851939201355, + "step": 1936 + }, + { + "epoch": 0.9573705671568021, + "grad_norm": 1.1085858464768976, + "learning_rate": 1.1542148224230897e-05, + "loss": 0.29163527488708496, + "step": 1937 + }, + { + "epoch": 0.9578648214506363, + "grad_norm": 1.2120558942254267, + "learning_rate": 1.1534074860295426e-05, + "loss": 0.302470326423645, + "step": 1938 + }, + { + "epoch": 0.9583590757444705, + "grad_norm": 1.1861857419569999, + "learning_rate": 1.15260004722335e-05, + "loss": 0.25946593284606934, + "step": 1939 + }, + { + "epoch": 0.9588533300383048, + "grad_norm": 1.1153985574382288, + "learning_rate": 1.1517925065435457e-05, + "loss": 0.2680559456348419, + "step": 1940 + }, + { + "epoch": 0.9593475843321388, + "grad_norm": 1.2104349484077064, + "learning_rate": 1.1509848645292334e-05, + "loss": 0.2684473991394043, + "step": 1941 + }, + { + "epoch": 0.959841838625973, + "grad_norm": 1.245187124369965, + "learning_rate": 1.1501771217195827e-05, + "loss": 0.2795519232749939, + "step": 1942 + }, + { + "epoch": 0.9603360929198073, + "grad_norm": 1.2532047895072767, + "learning_rate": 1.1493692786538313e-05, + "loss": 0.35209575295448303, + "step": 1943 + }, + { + "epoch": 0.9608303472136415, + "grad_norm": 1.176019791514668, + "learning_rate": 1.1485613358712839e-05, + "loss": 0.3058928847312927, + "step": 1944 + }, + { + "epoch": 0.9613246015074756, + "grad_norm": 1.103375830615649, + "learning_rate": 1.1477532939113112e-05, + "loss": 0.2889159619808197, + "step": 1945 + }, + { + "epoch": 0.9618188558013098, + "grad_norm": 1.175759039350938, + "learning_rate": 1.1469451533133506e-05, + "loss": 0.30782538652420044, + "step": 1946 + }, + { + "epoch": 0.962313110095144, + "grad_norm": 1.1326992133409532, + "learning_rate": 1.1461369146169052e-05, + "loss": 0.3091726005077362, + "step": 1947 + }, + { + "epoch": 0.9628073643889782, + "grad_norm": 1.2061917553730328, + "learning_rate": 1.1453285783615438e-05, + "loss": 0.3287050724029541, + "step": 1948 + }, + { + "epoch": 0.9633016186828123, + "grad_norm": 1.1941959404182023, + "learning_rate": 1.1445201450868998e-05, + "loss": 0.31267625093460083, + "step": 1949 + }, + { + "epoch": 0.9637958729766465, + "grad_norm": 1.1346278168962094, + "learning_rate": 1.1437116153326719e-05, + "loss": 0.30775952339172363, + "step": 1950 + }, + { + "epoch": 0.9642901272704807, + "grad_norm": 1.292541938462464, + "learning_rate": 1.142902989638623e-05, + "loss": 0.3825497329235077, + "step": 1951 + }, + { + "epoch": 0.9647843815643148, + "grad_norm": 1.0454710330230295, + "learning_rate": 1.1420942685445801e-05, + "loss": 0.2866062521934509, + "step": 1952 + }, + { + "epoch": 0.965278635858149, + "grad_norm": 1.144633580750803, + "learning_rate": 1.1412854525904335e-05, + "loss": 0.27787062525749207, + "step": 1953 + }, + { + "epoch": 0.9657728901519832, + "grad_norm": 1.1290436448297894, + "learning_rate": 1.1404765423161381e-05, + "loss": 0.302572101354599, + "step": 1954 + }, + { + "epoch": 0.9662671444458174, + "grad_norm": 1.0781086639824042, + "learning_rate": 1.1396675382617097e-05, + "loss": 0.29608359932899475, + "step": 1955 + }, + { + "epoch": 0.9667613987396515, + "grad_norm": 1.1646658995895742, + "learning_rate": 1.1388584409672285e-05, + "loss": 0.28057801723480225, + "step": 1956 + }, + { + "epoch": 0.9672556530334857, + "grad_norm": 1.1188617227766138, + "learning_rate": 1.1380492509728363e-05, + "loss": 0.29628869891166687, + "step": 1957 + }, + { + "epoch": 0.9677499073273199, + "grad_norm": 1.1207660926511307, + "learning_rate": 1.1372399688187365e-05, + "loss": 0.29254984855651855, + "step": 1958 + }, + { + "epoch": 0.9682441616211541, + "grad_norm": 1.10665523309967, + "learning_rate": 1.1364305950451946e-05, + "loss": 0.32925280928611755, + "step": 1959 + }, + { + "epoch": 0.9687384159149882, + "grad_norm": 1.108029328920716, + "learning_rate": 1.1356211301925367e-05, + "loss": 0.3072258234024048, + "step": 1960 + }, + { + "epoch": 0.9692326702088224, + "grad_norm": 1.1133536367191044, + "learning_rate": 1.1348115748011499e-05, + "loss": 0.29737845063209534, + "step": 1961 + }, + { + "epoch": 0.9697269245026566, + "grad_norm": 1.1169451234105505, + "learning_rate": 1.1340019294114822e-05, + "loss": 0.27369949221611023, + "step": 1962 + }, + { + "epoch": 0.9702211787964908, + "grad_norm": 1.2861478922811351, + "learning_rate": 1.1331921945640408e-05, + "loss": 0.33116602897644043, + "step": 1963 + }, + { + "epoch": 0.9707154330903249, + "grad_norm": 1.9398235156973715, + "learning_rate": 1.1323823707993937e-05, + "loss": 0.2620438039302826, + "step": 1964 + }, + { + "epoch": 0.9712096873841591, + "grad_norm": 1.1505189829247824, + "learning_rate": 1.1315724586581673e-05, + "loss": 0.3187680244445801, + "step": 1965 + }, + { + "epoch": 0.9717039416779933, + "grad_norm": 1.2391813787863328, + "learning_rate": 1.1307624586810472e-05, + "loss": 0.3675233721733093, + "step": 1966 + }, + { + "epoch": 0.9721981959718276, + "grad_norm": 1.2521490817049854, + "learning_rate": 1.1299523714087784e-05, + "loss": 0.31064945459365845, + "step": 1967 + }, + { + "epoch": 0.9726924502656616, + "grad_norm": 1.1166975993354054, + "learning_rate": 1.1291421973821632e-05, + "loss": 0.2941773235797882, + "step": 1968 + }, + { + "epoch": 0.9731867045594959, + "grad_norm": 1.2565504643296834, + "learning_rate": 1.128331937142062e-05, + "loss": 0.3443846106529236, + "step": 1969 + }, + { + "epoch": 0.9736809588533301, + "grad_norm": 1.1142268279429304, + "learning_rate": 1.1275215912293933e-05, + "loss": 0.2815151810646057, + "step": 1970 + }, + { + "epoch": 0.9741752131471643, + "grad_norm": 1.1622346059327586, + "learning_rate": 1.1267111601851327e-05, + "loss": 0.2886476516723633, + "step": 1971 + }, + { + "epoch": 0.9746694674409984, + "grad_norm": 1.0942194208380682, + "learning_rate": 1.1259006445503116e-05, + "loss": 0.2692835330963135, + "step": 1972 + }, + { + "epoch": 0.9751637217348326, + "grad_norm": 1.1112683317978183, + "learning_rate": 1.1250900448660192e-05, + "loss": 0.2748587727546692, + "step": 1973 + }, + { + "epoch": 0.9756579760286668, + "grad_norm": 1.192989589829818, + "learning_rate": 1.1242793616734002e-05, + "loss": 0.2963098883628845, + "step": 1974 + }, + { + "epoch": 0.976152230322501, + "grad_norm": 1.1305326657315258, + "learning_rate": 1.1234685955136552e-05, + "loss": 0.28353193402290344, + "step": 1975 + }, + { + "epoch": 0.9766464846163351, + "grad_norm": 1.1967273051238179, + "learning_rate": 1.1226577469280397e-05, + "loss": 0.3308493494987488, + "step": 1976 + }, + { + "epoch": 0.9771407389101693, + "grad_norm": 1.096933031801606, + "learning_rate": 1.1218468164578653e-05, + "loss": 0.26923754811286926, + "step": 1977 + }, + { + "epoch": 0.9776349932040035, + "grad_norm": 1.5091635403311783, + "learning_rate": 1.1210358046444968e-05, + "loss": 0.2730574905872345, + "step": 1978 + }, + { + "epoch": 0.9781292474978376, + "grad_norm": 1.1338996219219686, + "learning_rate": 1.1202247120293548e-05, + "loss": 0.26464858651161194, + "step": 1979 + }, + { + "epoch": 0.9786235017916718, + "grad_norm": 1.2694994457222093, + "learning_rate": 1.1194135391539127e-05, + "loss": 0.30095499753952026, + "step": 1980 + }, + { + "epoch": 0.979117756085506, + "grad_norm": 1.3227283597348862, + "learning_rate": 1.1186022865596983e-05, + "loss": 0.3418167233467102, + "step": 1981 + }, + { + "epoch": 0.9796120103793402, + "grad_norm": 1.2780598996117225, + "learning_rate": 1.117790954788292e-05, + "loss": 0.28735262155532837, + "step": 1982 + }, + { + "epoch": 0.9801062646731743, + "grad_norm": 1.109707631385258, + "learning_rate": 1.116979544381327e-05, + "loss": 0.26816800236701965, + "step": 1983 + }, + { + "epoch": 0.9806005189670085, + "grad_norm": 1.1873089360962268, + "learning_rate": 1.1161680558804897e-05, + "loss": 0.31004661321640015, + "step": 1984 + }, + { + "epoch": 0.9810947732608427, + "grad_norm": 1.2669673078204273, + "learning_rate": 1.1153564898275184e-05, + "loss": 0.33103084564208984, + "step": 1985 + }, + { + "epoch": 0.9815890275546769, + "grad_norm": 1.3375894512262838, + "learning_rate": 1.1145448467642021e-05, + "loss": 0.3804841637611389, + "step": 1986 + }, + { + "epoch": 0.982083281848511, + "grad_norm": 1.2029739003434823, + "learning_rate": 1.1137331272323834e-05, + "loss": 0.31861352920532227, + "step": 1987 + }, + { + "epoch": 0.9825775361423452, + "grad_norm": 1.1954996526655464, + "learning_rate": 1.1129213317739539e-05, + "loss": 0.3022298216819763, + "step": 1988 + }, + { + "epoch": 0.9830717904361794, + "grad_norm": 1.3466664334904774, + "learning_rate": 1.1121094609308564e-05, + "loss": 0.38203683495521545, + "step": 1989 + }, + { + "epoch": 0.9835660447300136, + "grad_norm": 1.215882197519198, + "learning_rate": 1.1112975152450848e-05, + "loss": 0.3105717897415161, + "step": 1990 + }, + { + "epoch": 0.9840602990238477, + "grad_norm": 1.2066484647947713, + "learning_rate": 1.1104854952586827e-05, + "loss": 0.31930285692214966, + "step": 1991 + }, + { + "epoch": 0.9845545533176819, + "grad_norm": 1.1639723195264664, + "learning_rate": 1.1096734015137422e-05, + "loss": 0.3167966902256012, + "step": 1992 + }, + { + "epoch": 0.9850488076115161, + "grad_norm": 1.168704133231974, + "learning_rate": 1.1088612345524059e-05, + "loss": 0.2693050801753998, + "step": 1993 + }, + { + "epoch": 0.9855430619053503, + "grad_norm": 1.0985586655404702, + "learning_rate": 1.1080489949168651e-05, + "loss": 0.27986466884613037, + "step": 1994 + }, + { + "epoch": 0.9860373161991844, + "grad_norm": 1.1481757517161775, + "learning_rate": 1.1072366831493589e-05, + "loss": 0.26814526319503784, + "step": 1995 + }, + { + "epoch": 0.9865315704930187, + "grad_norm": 1.146921609246337, + "learning_rate": 1.1064242997921753e-05, + "loss": 0.31393951177597046, + "step": 1996 + }, + { + "epoch": 0.9870258247868529, + "grad_norm": 1.1375630444026625, + "learning_rate": 1.1056118453876496e-05, + "loss": 0.2958461344242096, + "step": 1997 + }, + { + "epoch": 0.9875200790806871, + "grad_norm": 1.137037421352785, + "learning_rate": 1.1047993204781652e-05, + "loss": 0.29744619131088257, + "step": 1998 + }, + { + "epoch": 0.9880143333745212, + "grad_norm": 1.1508003551512254, + "learning_rate": 1.1039867256061516e-05, + "loss": 0.29055094718933105, + "step": 1999 + }, + { + "epoch": 0.9885085876683554, + "grad_norm": 1.1632161121950038, + "learning_rate": 1.103174061314086e-05, + "loss": 0.29961663484573364, + "step": 2000 + }, + { + "epoch": 0.9890028419621896, + "grad_norm": 1.0841825843818378, + "learning_rate": 1.102361328144491e-05, + "loss": 0.34533610939979553, + "step": 2001 + }, + { + "epoch": 0.9894970962560238, + "grad_norm": 1.1849596678411713, + "learning_rate": 1.1015485266399362e-05, + "loss": 0.2994460463523865, + "step": 2002 + }, + { + "epoch": 0.9899913505498579, + "grad_norm": 1.2325420364808024, + "learning_rate": 1.1007356573430357e-05, + "loss": 0.34309566020965576, + "step": 2003 + }, + { + "epoch": 0.9904856048436921, + "grad_norm": 1.2050309252665437, + "learning_rate": 1.09992272079645e-05, + "loss": 0.3049868643283844, + "step": 2004 + }, + { + "epoch": 0.9909798591375263, + "grad_norm": 1.1759703775328856, + "learning_rate": 1.0991097175428833e-05, + "loss": 0.30586326122283936, + "step": 2005 + }, + { + "epoch": 0.9914741134313604, + "grad_norm": 1.1997965130034223, + "learning_rate": 1.0982966481250854e-05, + "loss": 0.29740482568740845, + "step": 2006 + }, + { + "epoch": 0.9919683677251946, + "grad_norm": 1.2400023524315222, + "learning_rate": 1.0974835130858497e-05, + "loss": 0.3218206465244293, + "step": 2007 + }, + { + "epoch": 0.9924626220190288, + "grad_norm": 1.1309419286206777, + "learning_rate": 1.0966703129680139e-05, + "loss": 0.2747582495212555, + "step": 2008 + }, + { + "epoch": 0.992956876312863, + "grad_norm": 1.2581670135770728, + "learning_rate": 1.0958570483144578e-05, + "loss": 0.33215245604515076, + "step": 2009 + }, + { + "epoch": 0.9934511306066971, + "grad_norm": 1.2834058413633842, + "learning_rate": 1.0950437196681061e-05, + "loss": 0.3149756193161011, + "step": 2010 + }, + { + "epoch": 0.9939453849005313, + "grad_norm": 1.1001136330607295, + "learning_rate": 1.0942303275719253e-05, + "loss": 0.2763513922691345, + "step": 2011 + }, + { + "epoch": 0.9944396391943655, + "grad_norm": 1.0592905887432897, + "learning_rate": 1.0934168725689239e-05, + "loss": 0.2818325161933899, + "step": 2012 + }, + { + "epoch": 0.9949338934881997, + "grad_norm": 1.1079515754649163, + "learning_rate": 1.0926033552021533e-05, + "loss": 0.2659858167171478, + "step": 2013 + }, + { + "epoch": 0.9954281477820338, + "grad_norm": 1.1926210163358253, + "learning_rate": 1.091789776014706e-05, + "loss": 0.30891451239585876, + "step": 2014 + }, + { + "epoch": 0.995922402075868, + "grad_norm": 1.2194298136031743, + "learning_rate": 1.0909761355497156e-05, + "loss": 0.33645111322402954, + "step": 2015 + }, + { + "epoch": 0.9964166563697022, + "grad_norm": 1.1110546475920504, + "learning_rate": 1.0901624343503571e-05, + "loss": 0.3086194097995758, + "step": 2016 + }, + { + "epoch": 0.9969109106635364, + "grad_norm": 1.0167201052564092, + "learning_rate": 1.089348672959846e-05, + "loss": 0.2614179253578186, + "step": 2017 + }, + { + "epoch": 0.9974051649573705, + "grad_norm": 1.2224853324284848, + "learning_rate": 1.088534851921437e-05, + "loss": 0.3300556540489197, + "step": 2018 + }, + { + "epoch": 0.9978994192512047, + "grad_norm": 1.1929848499106601, + "learning_rate": 1.087720971778426e-05, + "loss": 0.28443643450737, + "step": 2019 + }, + { + "epoch": 0.9983936735450389, + "grad_norm": 1.052677422924197, + "learning_rate": 1.0869070330741475e-05, + "loss": 0.2805534601211548, + "step": 2020 + }, + { + "epoch": 0.9988879278388731, + "grad_norm": 1.065568553175956, + "learning_rate": 1.0860930363519758e-05, + "loss": 0.28186699748039246, + "step": 2021 + }, + { + "epoch": 0.9993821821327072, + "grad_norm": 1.2171160812601536, + "learning_rate": 1.0852789821553228e-05, + "loss": 0.3527688980102539, + "step": 2022 + }, + { + "epoch": 0.9998764364265414, + "grad_norm": 1.2020406854373213, + "learning_rate": 1.08446487102764e-05, + "loss": 0.30708247423171997, + "step": 2023 + }, + { + "epoch": 1.0, + "grad_norm": 2.286184440614986, + "learning_rate": 1.083650703512416e-05, + "loss": 0.3015655279159546, + "step": 2024 + }, + { + "epoch": 1.0004942542938342, + "grad_norm": 1.2067651750081223, + "learning_rate": 1.0828364801531777e-05, + "loss": 0.29792484641075134, + "step": 2025 + }, + { + "epoch": 1.0009885085876684, + "grad_norm": 1.1529758757862274, + "learning_rate": 1.0820222014934887e-05, + "loss": 0.27995994687080383, + "step": 2026 + }, + { + "epoch": 1.0014827628815026, + "grad_norm": 1.115022133563525, + "learning_rate": 1.0812078680769501e-05, + "loss": 0.25797444581985474, + "step": 2027 + }, + { + "epoch": 1.0019770171753366, + "grad_norm": 1.1202805963305373, + "learning_rate": 1.0803934804471991e-05, + "loss": 0.2834373116493225, + "step": 2028 + }, + { + "epoch": 1.0024712714691708, + "grad_norm": 1.147731866533824, + "learning_rate": 1.079579039147909e-05, + "loss": 0.27055832743644714, + "step": 2029 + }, + { + "epoch": 1.002965525763005, + "grad_norm": 1.1916483552600579, + "learning_rate": 1.0787645447227897e-05, + "loss": 0.30029311776161194, + "step": 2030 + }, + { + "epoch": 1.0034597800568392, + "grad_norm": 1.1834514894044206, + "learning_rate": 1.0779499977155858e-05, + "loss": 0.2741442322731018, + "step": 2031 + }, + { + "epoch": 1.0039540343506734, + "grad_norm": 1.1233171341295944, + "learning_rate": 1.0771353986700767e-05, + "loss": 0.27097994089126587, + "step": 2032 + }, + { + "epoch": 1.0044482886445076, + "grad_norm": 1.1267943347727831, + "learning_rate": 1.0763207481300781e-05, + "loss": 0.2690125107765198, + "step": 2033 + }, + { + "epoch": 1.0049425429383418, + "grad_norm": 1.1312636860673373, + "learning_rate": 1.0755060466394383e-05, + "loss": 0.29656079411506653, + "step": 2034 + }, + { + "epoch": 1.005436797232176, + "grad_norm": 1.1729529368370135, + "learning_rate": 1.0746912947420407e-05, + "loss": 0.25291675329208374, + "step": 2035 + }, + { + "epoch": 1.00593105152601, + "grad_norm": 1.410951786073956, + "learning_rate": 1.0738764929818017e-05, + "loss": 0.26391562819480896, + "step": 2036 + }, + { + "epoch": 1.0064253058198442, + "grad_norm": 1.258204498994485, + "learning_rate": 1.073061641902672e-05, + "loss": 0.2850308418273926, + "step": 2037 + }, + { + "epoch": 1.0069195601136784, + "grad_norm": 1.1368887973206072, + "learning_rate": 1.0722467420486338e-05, + "loss": 0.2529013454914093, + "step": 2038 + }, + { + "epoch": 1.0074138144075127, + "grad_norm": 1.2420233139292696, + "learning_rate": 1.0714317939637028e-05, + "loss": 0.2577154040336609, + "step": 2039 + }, + { + "epoch": 1.0079080687013469, + "grad_norm": 1.1996492314644527, + "learning_rate": 1.0706167981919269e-05, + "loss": 0.28677526116371155, + "step": 2040 + }, + { + "epoch": 1.008402322995181, + "grad_norm": 1.210233649974949, + "learning_rate": 1.0698017552773859e-05, + "loss": 0.25146183371543884, + "step": 2041 + }, + { + "epoch": 1.0088965772890153, + "grad_norm": 1.217205041102825, + "learning_rate": 1.0689866657641899e-05, + "loss": 0.29958251118659973, + "step": 2042 + }, + { + "epoch": 1.0093908315828495, + "grad_norm": 1.2422486891064726, + "learning_rate": 1.0681715301964817e-05, + "loss": 0.28512266278266907, + "step": 2043 + }, + { + "epoch": 1.0098850858766835, + "grad_norm": 1.3312817373132209, + "learning_rate": 1.067356349118434e-05, + "loss": 0.29768145084381104, + "step": 2044 + }, + { + "epoch": 1.0103793401705177, + "grad_norm": 1.2397312600868813, + "learning_rate": 1.0665411230742498e-05, + "loss": 0.25144103169441223, + "step": 2045 + }, + { + "epoch": 1.0108735944643519, + "grad_norm": 1.6026936131359757, + "learning_rate": 1.0657258526081629e-05, + "loss": 0.2673259973526001, + "step": 2046 + }, + { + "epoch": 1.011367848758186, + "grad_norm": 1.2940971813114743, + "learning_rate": 1.0649105382644359e-05, + "loss": 0.2845848500728607, + "step": 2047 + }, + { + "epoch": 1.0118621030520203, + "grad_norm": 1.0898574113835153, + "learning_rate": 1.0640951805873607e-05, + "loss": 0.2569392919540405, + "step": 2048 + }, + { + "epoch": 1.0123563573458545, + "grad_norm": 1.2632947550014098, + "learning_rate": 1.0632797801212591e-05, + "loss": 0.250387966632843, + "step": 2049 + }, + { + "epoch": 1.0128506116396887, + "grad_norm": 1.233630096360243, + "learning_rate": 1.0624643374104804e-05, + "loss": 0.28228282928466797, + "step": 2050 + }, + { + "epoch": 1.0133448659335227, + "grad_norm": 1.0888042979148498, + "learning_rate": 1.0616488529994024e-05, + "loss": 0.24724754691123962, + "step": 2051 + }, + { + "epoch": 1.013839120227357, + "grad_norm": 1.2576287774069197, + "learning_rate": 1.0608333274324312e-05, + "loss": 0.268532395362854, + "step": 2052 + }, + { + "epoch": 1.014333374521191, + "grad_norm": 1.1578525571147846, + "learning_rate": 1.0600177612539995e-05, + "loss": 0.27454662322998047, + "step": 2053 + }, + { + "epoch": 1.0148276288150253, + "grad_norm": 1.2050116136682636, + "learning_rate": 1.0592021550085683e-05, + "loss": 0.27497538924217224, + "step": 2054 + }, + { + "epoch": 1.0153218831088595, + "grad_norm": 1.1358282649300115, + "learning_rate": 1.0583865092406237e-05, + "loss": 0.24480152130126953, + "step": 2055 + }, + { + "epoch": 1.0158161374026937, + "grad_norm": 1.1352545460867702, + "learning_rate": 1.0575708244946805e-05, + "loss": 0.23754069209098816, + "step": 2056 + }, + { + "epoch": 1.016310391696528, + "grad_norm": 1.150720407382798, + "learning_rate": 1.056755101315277e-05, + "loss": 0.24541275203227997, + "step": 2057 + }, + { + "epoch": 1.0168046459903621, + "grad_norm": 1.2022551315194179, + "learning_rate": 1.055939340246979e-05, + "loss": 0.27724504470825195, + "step": 2058 + }, + { + "epoch": 1.0172989002841961, + "grad_norm": 1.2400168112160508, + "learning_rate": 1.0551235418343766e-05, + "loss": 0.2869918942451477, + "step": 2059 + }, + { + "epoch": 1.0177931545780303, + "grad_norm": 1.2299839323583324, + "learning_rate": 1.0543077066220854e-05, + "loss": 0.27153679728507996, + "step": 2060 + }, + { + "epoch": 1.0182874088718645, + "grad_norm": 1.1366017541860491, + "learning_rate": 1.0534918351547454e-05, + "loss": 0.2611347436904907, + "step": 2061 + }, + { + "epoch": 1.0187816631656987, + "grad_norm": 1.1317421431613228, + "learning_rate": 1.0526759279770202e-05, + "loss": 0.26649200916290283, + "step": 2062 + }, + { + "epoch": 1.019275917459533, + "grad_norm": 1.0930466767865903, + "learning_rate": 1.0518599856335983e-05, + "loss": 0.25164204835891724, + "step": 2063 + }, + { + "epoch": 1.0197701717533671, + "grad_norm": 1.2027289451385044, + "learning_rate": 1.0510440086691911e-05, + "loss": 0.288251131772995, + "step": 2064 + }, + { + "epoch": 1.0202644260472014, + "grad_norm": 1.2837951062377317, + "learning_rate": 1.0502279976285325e-05, + "loss": 0.27177444100379944, + "step": 2065 + }, + { + "epoch": 1.0207586803410356, + "grad_norm": 1.222948820556725, + "learning_rate": 1.0494119530563812e-05, + "loss": 0.2723502218723297, + "step": 2066 + }, + { + "epoch": 1.0212529346348695, + "grad_norm": 1.214398839170698, + "learning_rate": 1.0485958754975156e-05, + "loss": 0.2704971432685852, + "step": 2067 + }, + { + "epoch": 1.0217471889287038, + "grad_norm": 1.267114179641731, + "learning_rate": 1.0477797654967376e-05, + "loss": 0.30302050709724426, + "step": 2068 + }, + { + "epoch": 1.022241443222538, + "grad_norm": 1.268227752862744, + "learning_rate": 1.0469636235988711e-05, + "loss": 0.26408523321151733, + "step": 2069 + }, + { + "epoch": 1.0227356975163722, + "grad_norm": 1.2197627847133865, + "learning_rate": 1.0461474503487606e-05, + "loss": 0.2691786289215088, + "step": 2070 + }, + { + "epoch": 1.0232299518102064, + "grad_norm": 1.2792531550605064, + "learning_rate": 1.0453312462912714e-05, + "loss": 0.2823137640953064, + "step": 2071 + }, + { + "epoch": 1.0237242061040406, + "grad_norm": 1.2027503273852609, + "learning_rate": 1.04451501197129e-05, + "loss": 0.28837013244628906, + "step": 2072 + }, + { + "epoch": 1.0242184603978748, + "grad_norm": 1.27109994402604, + "learning_rate": 1.0436987479337229e-05, + "loss": 0.2809562683105469, + "step": 2073 + }, + { + "epoch": 1.024712714691709, + "grad_norm": 1.240431430170138, + "learning_rate": 1.0428824547234956e-05, + "loss": 0.2604525685310364, + "step": 2074 + }, + { + "epoch": 1.025206968985543, + "grad_norm": 1.1799966275921325, + "learning_rate": 1.0420661328855546e-05, + "loss": 0.24755606055259705, + "step": 2075 + }, + { + "epoch": 1.0257012232793772, + "grad_norm": 1.148092531592558, + "learning_rate": 1.0412497829648642e-05, + "loss": 0.2592730224132538, + "step": 2076 + }, + { + "epoch": 1.0261954775732114, + "grad_norm": 1.2356689091758393, + "learning_rate": 1.0404334055064083e-05, + "loss": 0.2693594694137573, + "step": 2077 + }, + { + "epoch": 1.0266897318670456, + "grad_norm": 1.2195187999450414, + "learning_rate": 1.0396170010551881e-05, + "loss": 0.2712753117084503, + "step": 2078 + }, + { + "epoch": 1.0271839861608798, + "grad_norm": 1.1741285828383992, + "learning_rate": 1.0388005701562245e-05, + "loss": 0.2693077027797699, + "step": 2079 + }, + { + "epoch": 1.027678240454714, + "grad_norm": 1.2670826968894364, + "learning_rate": 1.0379841133545544e-05, + "loss": 0.2791144847869873, + "step": 2080 + }, + { + "epoch": 1.0281724947485482, + "grad_norm": 1.163594554813514, + "learning_rate": 1.037167631195233e-05, + "loss": 0.27496254444122314, + "step": 2081 + }, + { + "epoch": 1.0286667490423822, + "grad_norm": 1.1305894692188725, + "learning_rate": 1.0363511242233322e-05, + "loss": 0.26037347316741943, + "step": 2082 + }, + { + "epoch": 1.0291610033362164, + "grad_norm": 1.2085934995349474, + "learning_rate": 1.0355345929839402e-05, + "loss": 0.2610514760017395, + "step": 2083 + }, + { + "epoch": 1.0296552576300506, + "grad_norm": 1.1531883738354434, + "learning_rate": 1.0347180380221618e-05, + "loss": 0.24750857055187225, + "step": 2084 + }, + { + "epoch": 1.0301495119238848, + "grad_norm": 1.2017075670935908, + "learning_rate": 1.0339014598831169e-05, + "loss": 0.2835415303707123, + "step": 2085 + }, + { + "epoch": 1.030643766217719, + "grad_norm": 1.2153811049556569, + "learning_rate": 1.033084859111942e-05, + "loss": 0.25762057304382324, + "step": 2086 + }, + { + "epoch": 1.0311380205115532, + "grad_norm": 1.3245241554987517, + "learning_rate": 1.032268236253788e-05, + "loss": 0.2818237841129303, + "step": 2087 + }, + { + "epoch": 1.0316322748053874, + "grad_norm": 1.2402911628462394, + "learning_rate": 1.0314515918538202e-05, + "loss": 0.27192944288253784, + "step": 2088 + }, + { + "epoch": 1.0321265290992216, + "grad_norm": 1.1715597954552734, + "learning_rate": 1.0306349264572195e-05, + "loss": 0.3002319931983948, + "step": 2089 + }, + { + "epoch": 1.0326207833930556, + "grad_norm": 1.221598051409306, + "learning_rate": 1.0298182406091794e-05, + "loss": 0.27106401324272156, + "step": 2090 + }, + { + "epoch": 1.0331150376868898, + "grad_norm": 1.2123644146814079, + "learning_rate": 1.0290015348549076e-05, + "loss": 0.2740558385848999, + "step": 2091 + }, + { + "epoch": 1.033609291980724, + "grad_norm": 1.2394453454529126, + "learning_rate": 1.0281848097396261e-05, + "loss": 0.2970008850097656, + "step": 2092 + }, + { + "epoch": 1.0341035462745582, + "grad_norm": 1.2003549808286662, + "learning_rate": 1.027368065808568e-05, + "loss": 0.27684125304222107, + "step": 2093 + }, + { + "epoch": 1.0345978005683925, + "grad_norm": 1.1371538472805924, + "learning_rate": 1.0265513036069803e-05, + "loss": 0.2732700705528259, + "step": 2094 + }, + { + "epoch": 1.0350920548622267, + "grad_norm": 1.1448190493490698, + "learning_rate": 1.0257345236801215e-05, + "loss": 0.25189805030822754, + "step": 2095 + }, + { + "epoch": 1.0355863091560609, + "grad_norm": 1.1221327830153236, + "learning_rate": 1.0249177265732629e-05, + "loss": 0.3177054524421692, + "step": 2096 + }, + { + "epoch": 1.036080563449895, + "grad_norm": 1.0492479192600686, + "learning_rate": 1.0241009128316854e-05, + "loss": 0.23350921273231506, + "step": 2097 + }, + { + "epoch": 1.036574817743729, + "grad_norm": 1.2565303796372052, + "learning_rate": 1.0232840830006832e-05, + "loss": 0.3011140525341034, + "step": 2098 + }, + { + "epoch": 1.0370690720375633, + "grad_norm": 1.164329016307231, + "learning_rate": 1.0224672376255598e-05, + "loss": 0.2578561305999756, + "step": 2099 + }, + { + "epoch": 1.0375633263313975, + "grad_norm": 1.1701632763887444, + "learning_rate": 1.0216503772516297e-05, + "loss": 0.2622804045677185, + "step": 2100 + }, + { + "epoch": 1.0380575806252317, + "grad_norm": 1.219987069304434, + "learning_rate": 1.0208335024242169e-05, + "loss": 0.2662869691848755, + "step": 2101 + }, + { + "epoch": 1.0385518349190659, + "grad_norm": 1.2303351498865798, + "learning_rate": 1.0200166136886558e-05, + "loss": 0.27084922790527344, + "step": 2102 + }, + { + "epoch": 1.0390460892129, + "grad_norm": 1.2434849653646893, + "learning_rate": 1.0191997115902891e-05, + "loss": 0.26290780305862427, + "step": 2103 + }, + { + "epoch": 1.0395403435067343, + "grad_norm": 1.192171896111284, + "learning_rate": 1.0183827966744694e-05, + "loss": 0.27367106080055237, + "step": 2104 + }, + { + "epoch": 1.0400345978005685, + "grad_norm": 1.2706879657010888, + "learning_rate": 1.0175658694865574e-05, + "loss": 0.28507113456726074, + "step": 2105 + }, + { + "epoch": 1.0405288520944025, + "grad_norm": 1.2299041683114893, + "learning_rate": 1.0167489305719221e-05, + "loss": 0.2533179521560669, + "step": 2106 + }, + { + "epoch": 1.0410231063882367, + "grad_norm": 1.2546449586851505, + "learning_rate": 1.0159319804759398e-05, + "loss": 0.28755924105644226, + "step": 2107 + }, + { + "epoch": 1.041517360682071, + "grad_norm": 1.1726176332749902, + "learning_rate": 1.015115019743995e-05, + "loss": 0.26722773909568787, + "step": 2108 + }, + { + "epoch": 1.042011614975905, + "grad_norm": 1.3986075029095133, + "learning_rate": 1.0142980489214788e-05, + "loss": 0.3122308850288391, + "step": 2109 + }, + { + "epoch": 1.0425058692697393, + "grad_norm": 1.1273960807987882, + "learning_rate": 1.0134810685537899e-05, + "loss": 0.22603261470794678, + "step": 2110 + }, + { + "epoch": 1.0430001235635735, + "grad_norm": 1.1517998097919544, + "learning_rate": 1.0126640791863316e-05, + "loss": 0.2823299169540405, + "step": 2111 + }, + { + "epoch": 1.0434943778574077, + "grad_norm": 1.3191906526904469, + "learning_rate": 1.0118470813645156e-05, + "loss": 0.30999040603637695, + "step": 2112 + }, + { + "epoch": 1.0439886321512417, + "grad_norm": 1.1820148857556874, + "learning_rate": 1.0110300756337569e-05, + "loss": 0.266022264957428, + "step": 2113 + }, + { + "epoch": 1.044482886445076, + "grad_norm": 1.6608098375974347, + "learning_rate": 1.0102130625394776e-05, + "loss": 0.2674095034599304, + "step": 2114 + }, + { + "epoch": 1.0449771407389101, + "grad_norm": 1.2172826939531747, + "learning_rate": 1.0093960426271037e-05, + "loss": 0.30045652389526367, + "step": 2115 + }, + { + "epoch": 1.0454713950327443, + "grad_norm": 1.1782919874699391, + "learning_rate": 1.0085790164420659e-05, + "loss": 0.28455668687820435, + "step": 2116 + }, + { + "epoch": 1.0459656493265785, + "grad_norm": 1.1749948852757104, + "learning_rate": 1.0077619845297992e-05, + "loss": 0.2429066300392151, + "step": 2117 + }, + { + "epoch": 1.0464599036204127, + "grad_norm": 1.1453766958637177, + "learning_rate": 1.0069449474357427e-05, + "loss": 0.2515121102333069, + "step": 2118 + }, + { + "epoch": 1.046954157914247, + "grad_norm": 1.234414346344525, + "learning_rate": 1.0061279057053385e-05, + "loss": 0.30011802911758423, + "step": 2119 + }, + { + "epoch": 1.0474484122080812, + "grad_norm": 1.1997300836338318, + "learning_rate": 1.005310859884032e-05, + "loss": 0.2577645480632782, + "step": 2120 + }, + { + "epoch": 1.0479426665019151, + "grad_norm": 1.0391250618888572, + "learning_rate": 1.0044938105172713e-05, + "loss": 0.21476465463638306, + "step": 2121 + }, + { + "epoch": 1.0484369207957493, + "grad_norm": 1.3902782329860977, + "learning_rate": 1.0036767581505067e-05, + "loss": 0.2587023079395294, + "step": 2122 + }, + { + "epoch": 1.0489311750895836, + "grad_norm": 1.1311469001510768, + "learning_rate": 1.0028597033291911e-05, + "loss": 0.2537185251712799, + "step": 2123 + }, + { + "epoch": 1.0494254293834178, + "grad_norm": 1.0410406857423857, + "learning_rate": 1.0020426465987782e-05, + "loss": 0.24486014246940613, + "step": 2124 + }, + { + "epoch": 1.049919683677252, + "grad_norm": 1.4376390907817962, + "learning_rate": 1.0012255885047241e-05, + "loss": 0.2728436589241028, + "step": 2125 + }, + { + "epoch": 1.0504139379710862, + "grad_norm": 1.3186765660198476, + "learning_rate": 1.0004085295924843e-05, + "loss": 0.30238842964172363, + "step": 2126 + }, + { + "epoch": 1.0509081922649204, + "grad_norm": 1.2910923396564535, + "learning_rate": 9.99591470407516e-06, + "loss": 0.30347609519958496, + "step": 2127 + }, + { + "epoch": 1.0514024465587544, + "grad_norm": 1.2188667375190219, + "learning_rate": 9.987744114952764e-06, + "loss": 0.2581411302089691, + "step": 2128 + }, + { + "epoch": 1.0518967008525886, + "grad_norm": 1.2560629408792487, + "learning_rate": 9.979573534012218e-06, + "loss": 0.239881694316864, + "step": 2129 + }, + { + "epoch": 1.0523909551464228, + "grad_norm": 1.2977893982324902, + "learning_rate": 9.971402966708092e-06, + "loss": 0.3058615028858185, + "step": 2130 + }, + { + "epoch": 1.052885209440257, + "grad_norm": 1.2842102843103194, + "learning_rate": 9.963232418494936e-06, + "loss": 0.25285837054252625, + "step": 2131 + }, + { + "epoch": 1.0533794637340912, + "grad_norm": 1.2217652802535364, + "learning_rate": 9.955061894827294e-06, + "loss": 0.27366510033607483, + "step": 2132 + }, + { + "epoch": 1.0538737180279254, + "grad_norm": 1.1489983530266883, + "learning_rate": 9.946891401159683e-06, + "loss": 0.22268086671829224, + "step": 2133 + }, + { + "epoch": 1.0543679723217596, + "grad_norm": 1.1461059074650484, + "learning_rate": 9.938720942946616e-06, + "loss": 0.2540682554244995, + "step": 2134 + }, + { + "epoch": 1.0548622266155938, + "grad_norm": 1.2357731632052622, + "learning_rate": 9.930550525642576e-06, + "loss": 0.262179970741272, + "step": 2135 + }, + { + "epoch": 1.0553564809094278, + "grad_norm": 1.2267299487839205, + "learning_rate": 9.92238015470201e-06, + "loss": 0.25471946597099304, + "step": 2136 + }, + { + "epoch": 1.055850735203262, + "grad_norm": 1.162352058446371, + "learning_rate": 9.914209835579344e-06, + "loss": 0.2580556571483612, + "step": 2137 + }, + { + "epoch": 1.0563449894970962, + "grad_norm": 1.261401071852413, + "learning_rate": 9.906039573728964e-06, + "loss": 0.29909616708755493, + "step": 2138 + }, + { + "epoch": 1.0568392437909304, + "grad_norm": 1.2162562018595562, + "learning_rate": 9.897869374605226e-06, + "loss": 0.2828724980354309, + "step": 2139 + }, + { + "epoch": 1.0573334980847646, + "grad_norm": 1.2076714268656592, + "learning_rate": 9.889699243662433e-06, + "loss": 0.26731711626052856, + "step": 2140 + }, + { + "epoch": 1.0578277523785988, + "grad_norm": 1.2666827338430986, + "learning_rate": 9.88152918635485e-06, + "loss": 0.2912555932998657, + "step": 2141 + }, + { + "epoch": 1.058322006672433, + "grad_norm": 1.1593053736993435, + "learning_rate": 9.873359208136685e-06, + "loss": 0.2335313856601715, + "step": 2142 + }, + { + "epoch": 1.0588162609662672, + "grad_norm": 1.2934128795704303, + "learning_rate": 9.865189314462105e-06, + "loss": 0.2716987729072571, + "step": 2143 + }, + { + "epoch": 1.0593105152601012, + "grad_norm": 1.3251488161911162, + "learning_rate": 9.857019510785215e-06, + "loss": 0.2919968068599701, + "step": 2144 + }, + { + "epoch": 1.0598047695539354, + "grad_norm": 1.197230535187453, + "learning_rate": 9.848849802560057e-06, + "loss": 0.26279503107070923, + "step": 2145 + }, + { + "epoch": 1.0602990238477696, + "grad_norm": 1.263871154668556, + "learning_rate": 9.840680195240606e-06, + "loss": 0.31622597575187683, + "step": 2146 + }, + { + "epoch": 1.0607932781416038, + "grad_norm": 1.270948260835911, + "learning_rate": 9.832510694280782e-06, + "loss": 0.2399556040763855, + "step": 2147 + }, + { + "epoch": 1.061287532435438, + "grad_norm": 1.2181574543701559, + "learning_rate": 9.824341305134428e-06, + "loss": 0.2650333046913147, + "step": 2148 + }, + { + "epoch": 1.0617817867292723, + "grad_norm": 1.274348887888969, + "learning_rate": 9.816172033255307e-06, + "loss": 0.26629161834716797, + "step": 2149 + }, + { + "epoch": 1.0622760410231065, + "grad_norm": 1.2611051957138737, + "learning_rate": 9.808002884097109e-06, + "loss": 0.28042545914649963, + "step": 2150 + }, + { + "epoch": 1.0627702953169407, + "grad_norm": 1.1495131020915084, + "learning_rate": 9.799833863113445e-06, + "loss": 0.24374082684516907, + "step": 2151 + }, + { + "epoch": 1.0632645496107747, + "grad_norm": 1.1048551979398207, + "learning_rate": 9.791664975757835e-06, + "loss": 0.23013898730278015, + "step": 2152 + }, + { + "epoch": 1.0637588039046089, + "grad_norm": 1.4072884886903234, + "learning_rate": 9.783496227483706e-06, + "loss": 0.25313276052474976, + "step": 2153 + }, + { + "epoch": 1.064253058198443, + "grad_norm": 1.248155174046862, + "learning_rate": 9.775327623744403e-06, + "loss": 0.2642362713813782, + "step": 2154 + }, + { + "epoch": 1.0647473124922773, + "grad_norm": 1.1405325090848468, + "learning_rate": 9.76715916999317e-06, + "loss": 0.2417108118534088, + "step": 2155 + }, + { + "epoch": 1.0652415667861115, + "grad_norm": 1.2556215450887547, + "learning_rate": 9.758990871683148e-06, + "loss": 0.25653502345085144, + "step": 2156 + }, + { + "epoch": 1.0657358210799457, + "grad_norm": 1.22877547041534, + "learning_rate": 9.750822734267378e-06, + "loss": 0.247604638338089, + "step": 2157 + }, + { + "epoch": 1.06623007537378, + "grad_norm": 1.2330600407976389, + "learning_rate": 9.742654763198786e-06, + "loss": 0.2675636112689972, + "step": 2158 + }, + { + "epoch": 1.0667243296676139, + "grad_norm": 1.230290211943024, + "learning_rate": 9.7344869639302e-06, + "loss": 0.2570686340332031, + "step": 2159 + }, + { + "epoch": 1.067218583961448, + "grad_norm": 1.4290278531414855, + "learning_rate": 9.726319341914323e-06, + "loss": 0.3046165704727173, + "step": 2160 + }, + { + "epoch": 1.0677128382552823, + "grad_norm": 1.3759048148010737, + "learning_rate": 9.718151902603744e-06, + "loss": 0.24278515577316284, + "step": 2161 + }, + { + "epoch": 1.0682070925491165, + "grad_norm": 1.235098490769484, + "learning_rate": 9.709984651450924e-06, + "loss": 0.2565615773200989, + "step": 2162 + }, + { + "epoch": 1.0687013468429507, + "grad_norm": 1.3303607886608886, + "learning_rate": 9.701817593908209e-06, + "loss": 0.2672972083091736, + "step": 2163 + }, + { + "epoch": 1.069195601136785, + "grad_norm": 1.1620974642583077, + "learning_rate": 9.693650735427808e-06, + "loss": 0.21376445889472961, + "step": 2164 + }, + { + "epoch": 1.0696898554306191, + "grad_norm": 1.2628274098639385, + "learning_rate": 9.685484081461802e-06, + "loss": 0.27743393182754517, + "step": 2165 + }, + { + "epoch": 1.0701841097244533, + "grad_norm": 1.3615817033316626, + "learning_rate": 9.677317637462125e-06, + "loss": 0.2747134566307068, + "step": 2166 + }, + { + "epoch": 1.0706783640182873, + "grad_norm": 1.1533673233774355, + "learning_rate": 9.669151408880581e-06, + "loss": 0.2775312066078186, + "step": 2167 + }, + { + "epoch": 1.0711726183121215, + "grad_norm": 1.392383813550365, + "learning_rate": 9.660985401168833e-06, + "loss": 0.2743167281150818, + "step": 2168 + }, + { + "epoch": 1.0716668726059557, + "grad_norm": 1.1731022030570613, + "learning_rate": 9.652819619778387e-06, + "loss": 0.26030686497688293, + "step": 2169 + }, + { + "epoch": 1.07216112689979, + "grad_norm": 1.2886350622041207, + "learning_rate": 9.644654070160603e-06, + "loss": 0.32307812571525574, + "step": 2170 + }, + { + "epoch": 1.0726553811936241, + "grad_norm": 1.309807945595821, + "learning_rate": 9.63648875776668e-06, + "loss": 0.2773011028766632, + "step": 2171 + }, + { + "epoch": 1.0731496354874583, + "grad_norm": 1.3767412291020849, + "learning_rate": 9.628323688047672e-06, + "loss": 0.27996528148651123, + "step": 2172 + }, + { + "epoch": 1.0736438897812925, + "grad_norm": 1.176261909375135, + "learning_rate": 9.620158866454459e-06, + "loss": 0.28022176027297974, + "step": 2173 + }, + { + "epoch": 1.0741381440751268, + "grad_norm": 1.1746327357052728, + "learning_rate": 9.61199429843776e-06, + "loss": 0.2688876986503601, + "step": 2174 + }, + { + "epoch": 1.0746323983689607, + "grad_norm": 1.1454924799354713, + "learning_rate": 9.60382998944812e-06, + "loss": 0.23915211856365204, + "step": 2175 + }, + { + "epoch": 1.075126652662795, + "grad_norm": 1.1770664027196904, + "learning_rate": 9.59566594493592e-06, + "loss": 0.2533806264400482, + "step": 2176 + }, + { + "epoch": 1.0756209069566292, + "grad_norm": 1.2321355277799408, + "learning_rate": 9.587502170351361e-06, + "loss": 0.2887522876262665, + "step": 2177 + }, + { + "epoch": 1.0761151612504634, + "grad_norm": 1.2169372388289537, + "learning_rate": 9.579338671144459e-06, + "loss": 0.2885408401489258, + "step": 2178 + }, + { + "epoch": 1.0766094155442976, + "grad_norm": 1.2209492195717289, + "learning_rate": 9.571175452765045e-06, + "loss": 0.25656914710998535, + "step": 2179 + }, + { + "epoch": 1.0771036698381318, + "grad_norm": 1.2669016448608037, + "learning_rate": 9.563012520662773e-06, + "loss": 0.2935143709182739, + "step": 2180 + }, + { + "epoch": 1.077597924131966, + "grad_norm": 1.2902152081672096, + "learning_rate": 9.554849880287103e-06, + "loss": 0.26728200912475586, + "step": 2181 + }, + { + "epoch": 1.0780921784258002, + "grad_norm": 1.4327778934971358, + "learning_rate": 9.546687537087287e-06, + "loss": 0.2558351159095764, + "step": 2182 + }, + { + "epoch": 1.0785864327196342, + "grad_norm": 1.133861673349663, + "learning_rate": 9.538525496512394e-06, + "loss": 0.2517240047454834, + "step": 2183 + }, + { + "epoch": 1.0790806870134684, + "grad_norm": 1.1033603168250732, + "learning_rate": 9.53036376401129e-06, + "loss": 0.23258647322654724, + "step": 2184 + }, + { + "epoch": 1.0795749413073026, + "grad_norm": 1.2016172891455823, + "learning_rate": 9.522202345032627e-06, + "loss": 0.24100016057491302, + "step": 2185 + }, + { + "epoch": 1.0800691956011368, + "grad_norm": 1.1844138198826075, + "learning_rate": 9.51404124502485e-06, + "loss": 0.27807697653770447, + "step": 2186 + }, + { + "epoch": 1.080563449894971, + "grad_norm": 1.2045646158236256, + "learning_rate": 9.50588046943619e-06, + "loss": 0.26146867871284485, + "step": 2187 + }, + { + "epoch": 1.0810577041888052, + "grad_norm": 1.3792610621050578, + "learning_rate": 9.497720023714675e-06, + "loss": 0.28570955991744995, + "step": 2188 + }, + { + "epoch": 1.0815519584826394, + "grad_norm": 1.146591161630138, + "learning_rate": 9.489559913308092e-06, + "loss": 0.22583246231079102, + "step": 2189 + }, + { + "epoch": 1.0820462127764734, + "grad_norm": 1.2292468406383597, + "learning_rate": 9.48140014366402e-06, + "loss": 0.27526232600212097, + "step": 2190 + }, + { + "epoch": 1.0825404670703076, + "grad_norm": 1.287410242270342, + "learning_rate": 9.473240720229803e-06, + "loss": 0.2777514159679413, + "step": 2191 + }, + { + "epoch": 1.0830347213641418, + "grad_norm": 1.217692620890676, + "learning_rate": 9.465081648452549e-06, + "loss": 0.25767001509666443, + "step": 2192 + }, + { + "epoch": 1.083528975657976, + "grad_norm": 1.2401214064051047, + "learning_rate": 9.456922933779148e-06, + "loss": 0.24114865064620972, + "step": 2193 + }, + { + "epoch": 1.0840232299518102, + "grad_norm": 1.3343620945353547, + "learning_rate": 9.448764581656237e-06, + "loss": 0.31198200583457947, + "step": 2194 + }, + { + "epoch": 1.0845174842456444, + "grad_norm": 1.2865355942160217, + "learning_rate": 9.440606597530213e-06, + "loss": 0.2724478840827942, + "step": 2195 + }, + { + "epoch": 1.0850117385394786, + "grad_norm": 1.2982367761916904, + "learning_rate": 9.432448986847229e-06, + "loss": 0.27796900272369385, + "step": 2196 + }, + { + "epoch": 1.0855059928333128, + "grad_norm": 1.293883522594156, + "learning_rate": 9.424291755053198e-06, + "loss": 0.2877587676048279, + "step": 2197 + }, + { + "epoch": 1.0860002471271468, + "grad_norm": 1.354561961211439, + "learning_rate": 9.416134907593764e-06, + "loss": 0.2898337244987488, + "step": 2198 + }, + { + "epoch": 1.086494501420981, + "grad_norm": 1.2931825621227928, + "learning_rate": 9.407978449914322e-06, + "loss": 0.2544672191143036, + "step": 2199 + }, + { + "epoch": 1.0869887557148152, + "grad_norm": 1.2905943399481439, + "learning_rate": 9.399822387460005e-06, + "loss": 0.28336071968078613, + "step": 2200 + }, + { + "epoch": 1.0874830100086494, + "grad_norm": 1.2871287196611743, + "learning_rate": 9.391666725675691e-06, + "loss": 0.2862734794616699, + "step": 2201 + }, + { + "epoch": 1.0879772643024836, + "grad_norm": 1.386969000020192, + "learning_rate": 9.383511470005978e-06, + "loss": 0.26331260800361633, + "step": 2202 + }, + { + "epoch": 1.0884715185963179, + "grad_norm": 1.2750467510922643, + "learning_rate": 9.375356625895201e-06, + "loss": 0.30087417364120483, + "step": 2203 + }, + { + "epoch": 1.088965772890152, + "grad_norm": 1.3434362766675538, + "learning_rate": 9.36720219878741e-06, + "loss": 0.2736594080924988, + "step": 2204 + }, + { + "epoch": 1.089460027183986, + "grad_norm": 1.4852243291487657, + "learning_rate": 9.359048194126395e-06, + "loss": 0.2704418897628784, + "step": 2205 + }, + { + "epoch": 1.0899542814778203, + "grad_norm": 1.2230094225693318, + "learning_rate": 9.350894617355645e-06, + "loss": 0.24540236592292786, + "step": 2206 + }, + { + "epoch": 1.0904485357716545, + "grad_norm": 1.2299505503288506, + "learning_rate": 9.342741473918375e-06, + "loss": 0.26376527547836304, + "step": 2207 + }, + { + "epoch": 1.0909427900654887, + "grad_norm": 1.0803859595224048, + "learning_rate": 9.334588769257502e-06, + "loss": 0.24062004685401917, + "step": 2208 + }, + { + "epoch": 1.0914370443593229, + "grad_norm": 1.1443970874822365, + "learning_rate": 9.326436508815662e-06, + "loss": 0.24209418892860413, + "step": 2209 + }, + { + "epoch": 1.091931298653157, + "grad_norm": 1.3414968412819865, + "learning_rate": 9.318284698035188e-06, + "loss": 0.2732285261154175, + "step": 2210 + }, + { + "epoch": 1.0924255529469913, + "grad_norm": 1.2470429271312866, + "learning_rate": 9.310133342358106e-06, + "loss": 0.2684158980846405, + "step": 2211 + }, + { + "epoch": 1.0929198072408255, + "grad_norm": 1.1035267199988392, + "learning_rate": 9.301982447226145e-06, + "loss": 0.22511601448059082, + "step": 2212 + }, + { + "epoch": 1.0934140615346597, + "grad_norm": 1.165505029883992, + "learning_rate": 9.293832018080731e-06, + "loss": 0.2622867226600647, + "step": 2213 + }, + { + "epoch": 1.0939083158284937, + "grad_norm": 1.2923685951682604, + "learning_rate": 9.285682060362974e-06, + "loss": 0.3030891418457031, + "step": 2214 + }, + { + "epoch": 1.094402570122328, + "grad_norm": 1.2523210407583818, + "learning_rate": 9.277532579513666e-06, + "loss": 0.24928592145442963, + "step": 2215 + }, + { + "epoch": 1.094896824416162, + "grad_norm": 1.2048717570746186, + "learning_rate": 9.269383580973285e-06, + "loss": 0.2588339149951935, + "step": 2216 + }, + { + "epoch": 1.0953910787099963, + "grad_norm": 1.2427748942142012, + "learning_rate": 9.261235070181983e-06, + "loss": 0.2587873339653015, + "step": 2217 + }, + { + "epoch": 1.0958853330038305, + "grad_norm": 1.3192410250632676, + "learning_rate": 9.253087052579596e-06, + "loss": 0.29420971870422363, + "step": 2218 + }, + { + "epoch": 1.0963795872976647, + "grad_norm": 1.1714489078180652, + "learning_rate": 9.244939533605619e-06, + "loss": 0.25384342670440674, + "step": 2219 + }, + { + "epoch": 1.096873841591499, + "grad_norm": 1.2208998726962157, + "learning_rate": 9.236792518699224e-06, + "loss": 0.23133251070976257, + "step": 2220 + }, + { + "epoch": 1.097368095885333, + "grad_norm": 1.1919788928879418, + "learning_rate": 9.228646013299233e-06, + "loss": 0.26196008920669556, + "step": 2221 + }, + { + "epoch": 1.0978623501791671, + "grad_norm": 1.345065700534229, + "learning_rate": 9.220500022844144e-06, + "loss": 0.2567690908908844, + "step": 2222 + }, + { + "epoch": 1.0983566044730013, + "grad_norm": 1.1808254692787845, + "learning_rate": 9.212354552772107e-06, + "loss": 0.2555367350578308, + "step": 2223 + }, + { + "epoch": 1.0988508587668355, + "grad_norm": 1.1544608952675586, + "learning_rate": 9.204209608520913e-06, + "loss": 0.24357245862483978, + "step": 2224 + }, + { + "epoch": 1.0993451130606697, + "grad_norm": 1.3367524689374175, + "learning_rate": 9.19606519552801e-06, + "loss": 0.2792712450027466, + "step": 2225 + }, + { + "epoch": 1.099839367354504, + "grad_norm": 1.3277136329189279, + "learning_rate": 9.1879213192305e-06, + "loss": 0.29090794920921326, + "step": 2226 + }, + { + "epoch": 1.1003336216483381, + "grad_norm": 1.304360721279056, + "learning_rate": 9.179777985065115e-06, + "loss": 0.2777528762817383, + "step": 2227 + }, + { + "epoch": 1.1008278759421724, + "grad_norm": 1.1781995191131436, + "learning_rate": 9.171635198468227e-06, + "loss": 0.263868123292923, + "step": 2228 + }, + { + "epoch": 1.1013221302360063, + "grad_norm": 1.184942105326879, + "learning_rate": 9.16349296487584e-06, + "loss": 0.24118748307228088, + "step": 2229 + }, + { + "epoch": 1.1018163845298405, + "grad_norm": 1.2411255946822906, + "learning_rate": 9.155351289723603e-06, + "loss": 0.2176896631717682, + "step": 2230 + }, + { + "epoch": 1.1023106388236747, + "grad_norm": 1.3759218504425914, + "learning_rate": 9.147210178446776e-06, + "loss": 0.24727840721607208, + "step": 2231 + }, + { + "epoch": 1.102804893117509, + "grad_norm": 1.287783002848043, + "learning_rate": 9.139069636480247e-06, + "loss": 0.2711295783519745, + "step": 2232 + }, + { + "epoch": 1.1032991474113432, + "grad_norm": 1.2808604096079383, + "learning_rate": 9.130929669258525e-06, + "loss": 0.2987736165523529, + "step": 2233 + }, + { + "epoch": 1.1037934017051774, + "grad_norm": 1.3771259989337001, + "learning_rate": 9.122790282215743e-06, + "loss": 0.2773835062980652, + "step": 2234 + }, + { + "epoch": 1.1042876559990116, + "grad_norm": 1.2299830744412572, + "learning_rate": 9.114651480785632e-06, + "loss": 0.29417523741722107, + "step": 2235 + }, + { + "epoch": 1.1047819102928456, + "grad_norm": 1.377692958442212, + "learning_rate": 9.106513270401545e-06, + "loss": 0.2642611265182495, + "step": 2236 + }, + { + "epoch": 1.1052761645866798, + "grad_norm": 1.2764125735134089, + "learning_rate": 9.098375656496434e-06, + "loss": 0.2789427638053894, + "step": 2237 + }, + { + "epoch": 1.105770418880514, + "grad_norm": 1.3238778744589295, + "learning_rate": 9.090238644502845e-06, + "loss": 0.3002237379550934, + "step": 2238 + }, + { + "epoch": 1.1062646731743482, + "grad_norm": 1.1862434874371655, + "learning_rate": 9.082102239852942e-06, + "loss": 0.27620676159858704, + "step": 2239 + }, + { + "epoch": 1.1067589274681824, + "grad_norm": 1.327009037228036, + "learning_rate": 9.07396644797847e-06, + "loss": 0.26718735694885254, + "step": 2240 + }, + { + "epoch": 1.1072531817620166, + "grad_norm": 1.3581828145326202, + "learning_rate": 9.065831274310763e-06, + "loss": 0.27443817257881165, + "step": 2241 + }, + { + "epoch": 1.1077474360558508, + "grad_norm": 1.2348189100714968, + "learning_rate": 9.057696724280748e-06, + "loss": 0.2536284923553467, + "step": 2242 + }, + { + "epoch": 1.108241690349685, + "grad_norm": 1.274876240899672, + "learning_rate": 9.049562803318942e-06, + "loss": 0.2583077549934387, + "step": 2243 + }, + { + "epoch": 1.108735944643519, + "grad_norm": 1.2591915779147578, + "learning_rate": 9.041429516855427e-06, + "loss": 0.2696278393268585, + "step": 2244 + }, + { + "epoch": 1.1092301989373532, + "grad_norm": 1.4248240108913692, + "learning_rate": 9.033296870319868e-06, + "loss": 0.2966364622116089, + "step": 2245 + }, + { + "epoch": 1.1097244532311874, + "grad_norm": 1.1050822330716321, + "learning_rate": 9.025164869141503e-06, + "loss": 0.22690679132938385, + "step": 2246 + }, + { + "epoch": 1.1102187075250216, + "grad_norm": 1.192560579016723, + "learning_rate": 9.017033518749147e-06, + "loss": 0.2777915894985199, + "step": 2247 + }, + { + "epoch": 1.1107129618188558, + "grad_norm": 1.3394858504136318, + "learning_rate": 9.008902824571168e-06, + "loss": 0.2890303134918213, + "step": 2248 + }, + { + "epoch": 1.11120721611269, + "grad_norm": 1.0426463189164805, + "learning_rate": 9.000772792035505e-06, + "loss": 0.22669392824172974, + "step": 2249 + }, + { + "epoch": 1.1117014704065242, + "grad_norm": 1.1970809485558533, + "learning_rate": 8.992643426569643e-06, + "loss": 0.26416563987731934, + "step": 2250 + }, + { + "epoch": 1.1121957247003584, + "grad_norm": 1.1888202892832207, + "learning_rate": 8.984514733600641e-06, + "loss": 0.2745298147201538, + "step": 2251 + }, + { + "epoch": 1.1126899789941924, + "grad_norm": 1.3798693264357922, + "learning_rate": 8.97638671855509e-06, + "loss": 0.31175684928894043, + "step": 2252 + }, + { + "epoch": 1.1131842332880266, + "grad_norm": 1.1626887122886307, + "learning_rate": 8.968259386859146e-06, + "loss": 0.2632657289505005, + "step": 2253 + }, + { + "epoch": 1.1136784875818608, + "grad_norm": 1.810662888324155, + "learning_rate": 8.960132743938485e-06, + "loss": 0.25820252299308777, + "step": 2254 + }, + { + "epoch": 1.114172741875695, + "grad_norm": 1.061521514088085, + "learning_rate": 8.95200679521835e-06, + "loss": 0.24255456030368805, + "step": 2255 + }, + { + "epoch": 1.1146669961695292, + "grad_norm": 1.2696759740581753, + "learning_rate": 8.943881546123506e-06, + "loss": 0.2973442077636719, + "step": 2256 + }, + { + "epoch": 1.1151612504633635, + "grad_norm": 1.1336353694819978, + "learning_rate": 8.935757002078252e-06, + "loss": 0.23320606350898743, + "step": 2257 + }, + { + "epoch": 1.1156555047571977, + "grad_norm": 1.275444057796017, + "learning_rate": 8.927633168506415e-06, + "loss": 0.2923268675804138, + "step": 2258 + }, + { + "epoch": 1.1161497590510319, + "grad_norm": 1.25496425665649, + "learning_rate": 8.91951005083135e-06, + "loss": 0.25932425260543823, + "step": 2259 + }, + { + "epoch": 1.1166440133448658, + "grad_norm": 1.2215943645090854, + "learning_rate": 8.911387654475943e-06, + "loss": 0.2631821036338806, + "step": 2260 + }, + { + "epoch": 1.1171382676387, + "grad_norm": 1.226020936236602, + "learning_rate": 8.903265984862581e-06, + "loss": 0.24741420149803162, + "step": 2261 + }, + { + "epoch": 1.1176325219325343, + "grad_norm": 1.165036984102613, + "learning_rate": 8.895145047413178e-06, + "loss": 0.2593516707420349, + "step": 2262 + }, + { + "epoch": 1.1181267762263685, + "grad_norm": 1.2132388690590856, + "learning_rate": 8.88702484754915e-06, + "loss": 0.22109609842300415, + "step": 2263 + }, + { + "epoch": 1.1186210305202027, + "grad_norm": 1.242512673005374, + "learning_rate": 8.878905390691437e-06, + "loss": 0.24363039433956146, + "step": 2264 + }, + { + "epoch": 1.1191152848140369, + "grad_norm": 1.210365574835302, + "learning_rate": 8.870786682260465e-06, + "loss": 0.2507505714893341, + "step": 2265 + }, + { + "epoch": 1.119609539107871, + "grad_norm": 1.3229609964254254, + "learning_rate": 8.86266872767617e-06, + "loss": 0.303046315908432, + "step": 2266 + }, + { + "epoch": 1.120103793401705, + "grad_norm": 1.282548473383847, + "learning_rate": 8.854551532357977e-06, + "loss": 0.257943332195282, + "step": 2267 + }, + { + "epoch": 1.1205980476955393, + "grad_norm": 1.2641740973335522, + "learning_rate": 8.84643510172482e-06, + "loss": 0.2697421610355377, + "step": 2268 + }, + { + "epoch": 1.1210923019893735, + "grad_norm": 1.126371134669409, + "learning_rate": 8.838319441195105e-06, + "loss": 0.20090234279632568, + "step": 2269 + }, + { + "epoch": 1.1215865562832077, + "grad_norm": 1.3584193930662543, + "learning_rate": 8.830204556186736e-06, + "loss": 0.2714189887046814, + "step": 2270 + }, + { + "epoch": 1.122080810577042, + "grad_norm": 1.1168786328747864, + "learning_rate": 8.822090452117084e-06, + "loss": 0.23497477173805237, + "step": 2271 + }, + { + "epoch": 1.122575064870876, + "grad_norm": 1.3047944688196833, + "learning_rate": 8.81397713440302e-06, + "loss": 0.2582445740699768, + "step": 2272 + }, + { + "epoch": 1.1230693191647103, + "grad_norm": 1.2807794267280126, + "learning_rate": 8.805864608460876e-06, + "loss": 0.26494619250297546, + "step": 2273 + }, + { + "epoch": 1.1235635734585445, + "grad_norm": 1.3251515621500554, + "learning_rate": 8.797752879706455e-06, + "loss": 0.2767868936061859, + "step": 2274 + }, + { + "epoch": 1.1240578277523785, + "grad_norm": 1.5161646380346314, + "learning_rate": 8.789641953555032e-06, + "loss": 0.27696311473846436, + "step": 2275 + }, + { + "epoch": 1.1245520820462127, + "grad_norm": 1.3659389136687503, + "learning_rate": 8.78153183542135e-06, + "loss": 0.27048689126968384, + "step": 2276 + }, + { + "epoch": 1.125046336340047, + "grad_norm": 1.3893625373049876, + "learning_rate": 8.773422530719606e-06, + "loss": 0.2940211892127991, + "step": 2277 + }, + { + "epoch": 1.1255405906338811, + "grad_norm": 1.310212206650707, + "learning_rate": 8.765314044863453e-06, + "loss": 0.24859851598739624, + "step": 2278 + }, + { + "epoch": 1.1260348449277153, + "grad_norm": 1.3087530353150083, + "learning_rate": 8.757206383265998e-06, + "loss": 0.28879350423812866, + "step": 2279 + }, + { + "epoch": 1.1265290992215495, + "grad_norm": 1.2514534154786532, + "learning_rate": 8.74909955133981e-06, + "loss": 0.24804209172725677, + "step": 2280 + }, + { + "epoch": 1.1270233535153837, + "grad_norm": 1.3358056447173947, + "learning_rate": 8.740993554496886e-06, + "loss": 0.3199496567249298, + "step": 2281 + }, + { + "epoch": 1.1275176078092177, + "grad_norm": 2.15705729620974, + "learning_rate": 8.732888398148678e-06, + "loss": 0.3098929524421692, + "step": 2282 + }, + { + "epoch": 1.128011862103052, + "grad_norm": 1.2048730778866592, + "learning_rate": 8.724784087706067e-06, + "loss": 0.21280749142169952, + "step": 2283 + }, + { + "epoch": 1.1285061163968861, + "grad_norm": 1.1819530781050969, + "learning_rate": 8.716680628579382e-06, + "loss": 0.25330856442451477, + "step": 2284 + }, + { + "epoch": 1.1290003706907203, + "grad_norm": 1.2218083349938962, + "learning_rate": 8.708578026178371e-06, + "loss": 0.26141977310180664, + "step": 2285 + }, + { + "epoch": 1.1294946249845546, + "grad_norm": 1.3085311775335164, + "learning_rate": 8.700476285912219e-06, + "loss": 0.2529010772705078, + "step": 2286 + }, + { + "epoch": 1.1299888792783888, + "grad_norm": 1.4496496993285695, + "learning_rate": 8.69237541318953e-06, + "loss": 0.2662504315376282, + "step": 2287 + }, + { + "epoch": 1.130483133572223, + "grad_norm": 1.2797233255982605, + "learning_rate": 8.684275413418329e-06, + "loss": 0.2724575996398926, + "step": 2288 + }, + { + "epoch": 1.1309773878660572, + "grad_norm": 1.2524016016810007, + "learning_rate": 8.676176292006065e-06, + "loss": 0.2820962965488434, + "step": 2289 + }, + { + "epoch": 1.1314716421598914, + "grad_norm": 1.2157522787611978, + "learning_rate": 8.668078054359595e-06, + "loss": 0.2594743072986603, + "step": 2290 + }, + { + "epoch": 1.1319658964537254, + "grad_norm": 1.1017631552140204, + "learning_rate": 8.659980705885183e-06, + "loss": 0.25397709012031555, + "step": 2291 + }, + { + "epoch": 1.1324601507475596, + "grad_norm": 1.3505914192645034, + "learning_rate": 8.651884251988503e-06, + "loss": 0.27261337637901306, + "step": 2292 + }, + { + "epoch": 1.1329544050413938, + "grad_norm": 1.191460472235454, + "learning_rate": 8.643788698074638e-06, + "loss": 0.2726992070674896, + "step": 2293 + }, + { + "epoch": 1.133448659335228, + "grad_norm": 1.2175895117879216, + "learning_rate": 8.635694049548058e-06, + "loss": 0.2792774438858032, + "step": 2294 + }, + { + "epoch": 1.1339429136290622, + "grad_norm": 1.272860546351146, + "learning_rate": 8.627600311812638e-06, + "loss": 0.310885488986969, + "step": 2295 + }, + { + "epoch": 1.1344371679228964, + "grad_norm": 1.2747295027163217, + "learning_rate": 8.619507490271638e-06, + "loss": 0.27060413360595703, + "step": 2296 + }, + { + "epoch": 1.1349314222167306, + "grad_norm": 1.2507140444567972, + "learning_rate": 8.611415590327718e-06, + "loss": 0.27069440484046936, + "step": 2297 + }, + { + "epoch": 1.1354256765105646, + "grad_norm": 1.2299186955801236, + "learning_rate": 8.603324617382905e-06, + "loss": 0.2790459990501404, + "step": 2298 + }, + { + "epoch": 1.1359199308043988, + "grad_norm": 1.2813816772493964, + "learning_rate": 8.595234576838624e-06, + "loss": 0.27170947194099426, + "step": 2299 + }, + { + "epoch": 1.136414185098233, + "grad_norm": 1.1903279302585759, + "learning_rate": 8.587145474095665e-06, + "loss": 0.25313863158226013, + "step": 2300 + }, + { + "epoch": 1.1369084393920672, + "grad_norm": 1.2968469055543796, + "learning_rate": 8.5790573145542e-06, + "loss": 0.289467990398407, + "step": 2301 + }, + { + "epoch": 1.1374026936859014, + "grad_norm": 1.3141096348522086, + "learning_rate": 8.570970103613774e-06, + "loss": 0.29796460270881653, + "step": 2302 + }, + { + "epoch": 1.1378969479797356, + "grad_norm": 1.2855551342619271, + "learning_rate": 8.562883846673286e-06, + "loss": 0.27264270186424255, + "step": 2303 + }, + { + "epoch": 1.1383912022735698, + "grad_norm": 1.2243974310235655, + "learning_rate": 8.554798549131005e-06, + "loss": 0.3099757134914398, + "step": 2304 + }, + { + "epoch": 1.138885456567404, + "grad_norm": 1.2936181628424743, + "learning_rate": 8.546714216384565e-06, + "loss": 0.30002498626708984, + "step": 2305 + }, + { + "epoch": 1.139379710861238, + "grad_norm": 1.7617864884936485, + "learning_rate": 8.538630853830951e-06, + "loss": 0.2428818643093109, + "step": 2306 + }, + { + "epoch": 1.1398739651550722, + "grad_norm": 1.24686983002664, + "learning_rate": 8.530548466866497e-06, + "loss": 0.2601294219493866, + "step": 2307 + }, + { + "epoch": 1.1403682194489064, + "grad_norm": 1.2066765531591284, + "learning_rate": 8.522467060886888e-06, + "loss": 0.23878628015518188, + "step": 2308 + }, + { + "epoch": 1.1408624737427406, + "grad_norm": 1.345733709932402, + "learning_rate": 8.514386641287163e-06, + "loss": 0.2780643403530121, + "step": 2309 + }, + { + "epoch": 1.1413567280365748, + "grad_norm": 1.2756115099724787, + "learning_rate": 8.506307213461689e-06, + "loss": 0.29834824800491333, + "step": 2310 + }, + { + "epoch": 1.141850982330409, + "grad_norm": 1.3376095615389103, + "learning_rate": 8.498228782804175e-06, + "loss": 0.2733996510505676, + "step": 2311 + }, + { + "epoch": 1.1423452366242433, + "grad_norm": 1.3063802509871558, + "learning_rate": 8.490151354707669e-06, + "loss": 0.2524843215942383, + "step": 2312 + }, + { + "epoch": 1.1428394909180772, + "grad_norm": 1.2776723106689647, + "learning_rate": 8.482074934564543e-06, + "loss": 0.29077857732772827, + "step": 2313 + }, + { + "epoch": 1.1433337452119114, + "grad_norm": 1.2114776729729342, + "learning_rate": 8.473999527766503e-06, + "loss": 0.25935155153274536, + "step": 2314 + }, + { + "epoch": 1.1438279995057457, + "grad_norm": 1.3166365920869918, + "learning_rate": 8.465925139704578e-06, + "loss": 0.23595012724399567, + "step": 2315 + }, + { + "epoch": 1.1443222537995799, + "grad_norm": 1.2268504419293456, + "learning_rate": 8.457851775769108e-06, + "loss": 0.25193360447883606, + "step": 2316 + }, + { + "epoch": 1.144816508093414, + "grad_norm": 1.2847886622034916, + "learning_rate": 8.449779441349755e-06, + "loss": 0.26844412088394165, + "step": 2317 + }, + { + "epoch": 1.1453107623872483, + "grad_norm": 1.2550831674884213, + "learning_rate": 8.441708141835499e-06, + "loss": 0.2507320046424866, + "step": 2318 + }, + { + "epoch": 1.1458050166810825, + "grad_norm": 1.31186920690482, + "learning_rate": 8.433637882614624e-06, + "loss": 0.2756047248840332, + "step": 2319 + }, + { + "epoch": 1.1462992709749167, + "grad_norm": 1.3818376930568548, + "learning_rate": 8.425568669074717e-06, + "loss": 0.3136482536792755, + "step": 2320 + }, + { + "epoch": 1.146793525268751, + "grad_norm": 1.3094285230006764, + "learning_rate": 8.417500506602668e-06, + "loss": 0.25975438952445984, + "step": 2321 + }, + { + "epoch": 1.1472877795625849, + "grad_norm": 1.3148310008881885, + "learning_rate": 8.409433400584674e-06, + "loss": 0.2524915039539337, + "step": 2322 + }, + { + "epoch": 1.147782033856419, + "grad_norm": 1.316055955366049, + "learning_rate": 8.401367356406214e-06, + "loss": 0.2731180787086487, + "step": 2323 + }, + { + "epoch": 1.1482762881502533, + "grad_norm": 1.4277670811350172, + "learning_rate": 8.393302379452065e-06, + "loss": 0.27752095460891724, + "step": 2324 + }, + { + "epoch": 1.1487705424440875, + "grad_norm": 1.2586766809004215, + "learning_rate": 8.385238475106287e-06, + "loss": 0.269240140914917, + "step": 2325 + }, + { + "epoch": 1.1492647967379217, + "grad_norm": 1.301058586916402, + "learning_rate": 8.377175648752236e-06, + "loss": 0.2668418288230896, + "step": 2326 + }, + { + "epoch": 1.149759051031756, + "grad_norm": 1.2869179599070777, + "learning_rate": 8.369113905772532e-06, + "loss": 0.29276758432388306, + "step": 2327 + }, + { + "epoch": 1.15025330532559, + "grad_norm": 1.240170388592341, + "learning_rate": 8.361053251549083e-06, + "loss": 0.26562872529029846, + "step": 2328 + }, + { + "epoch": 1.150747559619424, + "grad_norm": 1.2907483203574122, + "learning_rate": 8.352993691463063e-06, + "loss": 0.257779061794281, + "step": 2329 + }, + { + "epoch": 1.1512418139132583, + "grad_norm": 1.3761256870332743, + "learning_rate": 8.344935230894926e-06, + "loss": 0.2871868312358856, + "step": 2330 + }, + { + "epoch": 1.1517360682070925, + "grad_norm": 1.2766304490065612, + "learning_rate": 8.336877875224379e-06, + "loss": 0.25191348791122437, + "step": 2331 + }, + { + "epoch": 1.1522303225009267, + "grad_norm": 1.1532415542893881, + "learning_rate": 8.3288216298304e-06, + "loss": 0.27057239413261414, + "step": 2332 + }, + { + "epoch": 1.152724576794761, + "grad_norm": 1.3903855220327628, + "learning_rate": 8.32076650009122e-06, + "loss": 0.31574326753616333, + "step": 2333 + }, + { + "epoch": 1.1532188310885951, + "grad_norm": 1.5549371484345924, + "learning_rate": 8.312712491384332e-06, + "loss": 0.22503693401813507, + "step": 2334 + }, + { + "epoch": 1.1537130853824293, + "grad_norm": 1.2363735263099107, + "learning_rate": 8.304659609086478e-06, + "loss": 0.25754863023757935, + "step": 2335 + }, + { + "epoch": 1.1542073396762635, + "grad_norm": 1.1790773293013888, + "learning_rate": 8.296607858573646e-06, + "loss": 0.24367934465408325, + "step": 2336 + }, + { + "epoch": 1.1547015939700975, + "grad_norm": 1.2792693896599328, + "learning_rate": 8.288557245221068e-06, + "loss": 0.28907084465026855, + "step": 2337 + }, + { + "epoch": 1.1551958482639317, + "grad_norm": 1.1852966795691644, + "learning_rate": 8.280507774403217e-06, + "loss": 0.24526283144950867, + "step": 2338 + }, + { + "epoch": 1.155690102557766, + "grad_norm": 1.1473094958169556, + "learning_rate": 8.272459451493811e-06, + "loss": 0.21968787908554077, + "step": 2339 + }, + { + "epoch": 1.1561843568516001, + "grad_norm": 1.1613080622383485, + "learning_rate": 8.264412281865791e-06, + "loss": 0.23803061246871948, + "step": 2340 + }, + { + "epoch": 1.1566786111454344, + "grad_norm": 1.2818425210270699, + "learning_rate": 8.256366270891335e-06, + "loss": 0.25715917348861694, + "step": 2341 + }, + { + "epoch": 1.1571728654392686, + "grad_norm": 1.300939575113673, + "learning_rate": 8.248321423941836e-06, + "loss": 0.29443520307540894, + "step": 2342 + }, + { + "epoch": 1.1576671197331028, + "grad_norm": 1.2224332053171705, + "learning_rate": 8.240277746387934e-06, + "loss": 0.24904949963092804, + "step": 2343 + }, + { + "epoch": 1.1581613740269368, + "grad_norm": 1.2866663921835886, + "learning_rate": 8.23223524359946e-06, + "loss": 0.2594628632068634, + "step": 2344 + }, + { + "epoch": 1.158655628320771, + "grad_norm": 1.2731058113968243, + "learning_rate": 8.224193920945482e-06, + "loss": 0.23853302001953125, + "step": 2345 + }, + { + "epoch": 1.1591498826146052, + "grad_norm": 1.3394742959570003, + "learning_rate": 8.216153783794266e-06, + "loss": 0.25465112924575806, + "step": 2346 + }, + { + "epoch": 1.1596441369084394, + "grad_norm": 1.3135301213887383, + "learning_rate": 8.208114837513297e-06, + "loss": 0.28038230538368225, + "step": 2347 + }, + { + "epoch": 1.1601383912022736, + "grad_norm": 1.350685866794537, + "learning_rate": 8.200077087469262e-06, + "loss": 0.3144591450691223, + "step": 2348 + }, + { + "epoch": 1.1606326454961078, + "grad_norm": 1.281224607522297, + "learning_rate": 8.192040539028047e-06, + "loss": 0.25782787799835205, + "step": 2349 + }, + { + "epoch": 1.161126899789942, + "grad_norm": 1.5124699254380607, + "learning_rate": 8.18400519755473e-06, + "loss": 0.21928566694259644, + "step": 2350 + }, + { + "epoch": 1.1616211540837762, + "grad_norm": 1.2617101773123074, + "learning_rate": 8.175971068413598e-06, + "loss": 0.2277221381664276, + "step": 2351 + }, + { + "epoch": 1.1621154083776104, + "grad_norm": 1.3465952359588251, + "learning_rate": 8.16793815696812e-06, + "loss": 0.26971378922462463, + "step": 2352 + }, + { + "epoch": 1.1626096626714444, + "grad_norm": 1.352802202139023, + "learning_rate": 8.15990646858095e-06, + "loss": 0.26448535919189453, + "step": 2353 + }, + { + "epoch": 1.1631039169652786, + "grad_norm": 1.3091049684475664, + "learning_rate": 8.151876008613927e-06, + "loss": 0.26372095942497253, + "step": 2354 + }, + { + "epoch": 1.1635981712591128, + "grad_norm": 1.3450938198850664, + "learning_rate": 8.143846782428078e-06, + "loss": 0.2594243288040161, + "step": 2355 + }, + { + "epoch": 1.164092425552947, + "grad_norm": 1.2377171543356333, + "learning_rate": 8.135818795383597e-06, + "loss": 0.23994986712932587, + "step": 2356 + }, + { + "epoch": 1.1645866798467812, + "grad_norm": 1.2983017697862052, + "learning_rate": 8.12779205283985e-06, + "loss": 0.2746032476425171, + "step": 2357 + }, + { + "epoch": 1.1650809341406154, + "grad_norm": 1.3938993958898265, + "learning_rate": 8.119766560155377e-06, + "loss": 0.3323846161365509, + "step": 2358 + }, + { + "epoch": 1.1655751884344494, + "grad_norm": 1.3890076094482564, + "learning_rate": 8.111742322687886e-06, + "loss": 0.28155508637428284, + "step": 2359 + }, + { + "epoch": 1.1660694427282836, + "grad_norm": 1.361844276882708, + "learning_rate": 8.103719345794237e-06, + "loss": 0.2936748266220093, + "step": 2360 + }, + { + "epoch": 1.1665636970221178, + "grad_norm": 1.2168650482731003, + "learning_rate": 8.095697634830463e-06, + "loss": 0.23575282096862793, + "step": 2361 + }, + { + "epoch": 1.167057951315952, + "grad_norm": 1.277845029620416, + "learning_rate": 8.087677195151737e-06, + "loss": 0.24547496438026428, + "step": 2362 + }, + { + "epoch": 1.1675522056097862, + "grad_norm": 1.3371291006512767, + "learning_rate": 8.079658032112388e-06, + "loss": 0.2936372458934784, + "step": 2363 + }, + { + "epoch": 1.1680464599036204, + "grad_norm": 1.316297337509115, + "learning_rate": 8.071640151065902e-06, + "loss": 0.28602418303489685, + "step": 2364 + }, + { + "epoch": 1.1685407141974546, + "grad_norm": 1.271542457187923, + "learning_rate": 8.0636235573649e-06, + "loss": 0.2742761969566345, + "step": 2365 + }, + { + "epoch": 1.1690349684912889, + "grad_norm": 1.2379702024007857, + "learning_rate": 8.05560825636114e-06, + "loss": 0.2590268552303314, + "step": 2366 + }, + { + "epoch": 1.169529222785123, + "grad_norm": 1.2195835846594238, + "learning_rate": 8.047594253405525e-06, + "loss": 0.26881399750709534, + "step": 2367 + }, + { + "epoch": 1.170023477078957, + "grad_norm": 1.279205613064969, + "learning_rate": 8.039581553848093e-06, + "loss": 0.27069953083992004, + "step": 2368 + }, + { + "epoch": 1.1705177313727912, + "grad_norm": 1.1650094541250327, + "learning_rate": 8.031570163038005e-06, + "loss": 0.27320611476898193, + "step": 2369 + }, + { + "epoch": 1.1710119856666255, + "grad_norm": 1.289507742767465, + "learning_rate": 8.023560086323548e-06, + "loss": 0.26400327682495117, + "step": 2370 + }, + { + "epoch": 1.1715062399604597, + "grad_norm": 1.1403608861276666, + "learning_rate": 8.015551329052136e-06, + "loss": 0.22287744283676147, + "step": 2371 + }, + { + "epoch": 1.1720004942542939, + "grad_norm": 1.2409841787965832, + "learning_rate": 8.007543896570309e-06, + "loss": 0.28240424394607544, + "step": 2372 + }, + { + "epoch": 1.172494748548128, + "grad_norm": 1.3414402473623117, + "learning_rate": 7.999537794223702e-06, + "loss": 0.27119147777557373, + "step": 2373 + }, + { + "epoch": 1.1729890028419623, + "grad_norm": 1.376418134177551, + "learning_rate": 7.991533027357085e-06, + "loss": 0.2579900920391083, + "step": 2374 + }, + { + "epoch": 1.1734832571357963, + "grad_norm": 1.197547817498857, + "learning_rate": 7.983529601314317e-06, + "loss": 0.25550374388694763, + "step": 2375 + }, + { + "epoch": 1.1739775114296305, + "grad_norm": 1.119102387270249, + "learning_rate": 7.97552752143838e-06, + "loss": 0.21197429299354553, + "step": 2376 + }, + { + "epoch": 1.1744717657234647, + "grad_norm": 1.245296460371477, + "learning_rate": 7.96752679307134e-06, + "loss": 0.28724029660224915, + "step": 2377 + }, + { + "epoch": 1.1749660200172989, + "grad_norm": 1.119081251981291, + "learning_rate": 7.959527421554375e-06, + "loss": 0.24320468306541443, + "step": 2378 + }, + { + "epoch": 1.175460274311133, + "grad_norm": 1.1094352642608503, + "learning_rate": 7.951529412227745e-06, + "loss": 0.22487501800060272, + "step": 2379 + }, + { + "epoch": 1.1759545286049673, + "grad_norm": 1.1424975538486684, + "learning_rate": 7.943532770430811e-06, + "loss": 0.2754969894886017, + "step": 2380 + }, + { + "epoch": 1.1764487828988015, + "grad_norm": 1.2424832323819373, + "learning_rate": 7.93553750150202e-06, + "loss": 0.2734825909137726, + "step": 2381 + }, + { + "epoch": 1.1769430371926357, + "grad_norm": 1.3311172796502668, + "learning_rate": 7.927543610778895e-06, + "loss": 0.2803332209587097, + "step": 2382 + }, + { + "epoch": 1.1774372914864697, + "grad_norm": 1.3572589379934268, + "learning_rate": 7.919551103598037e-06, + "loss": 0.2820316255092621, + "step": 2383 + }, + { + "epoch": 1.177931545780304, + "grad_norm": 1.1984541262238777, + "learning_rate": 7.911559985295142e-06, + "loss": 0.26788315176963806, + "step": 2384 + }, + { + "epoch": 1.178425800074138, + "grad_norm": 1.152974420484647, + "learning_rate": 7.90357026120496e-06, + "loss": 0.2562825083732605, + "step": 2385 + }, + { + "epoch": 1.1789200543679723, + "grad_norm": 1.3733272776027918, + "learning_rate": 7.895581936661316e-06, + "loss": 0.28260675072669983, + "step": 2386 + }, + { + "epoch": 1.1794143086618065, + "grad_norm": 1.2509507258139472, + "learning_rate": 7.887595016997105e-06, + "loss": 0.25887200236320496, + "step": 2387 + }, + { + "epoch": 1.1799085629556407, + "grad_norm": 1.1852436756934879, + "learning_rate": 7.879609507544274e-06, + "loss": 0.2351648062467575, + "step": 2388 + }, + { + "epoch": 1.180402817249475, + "grad_norm": 1.310528017980178, + "learning_rate": 7.871625413633843e-06, + "loss": 0.2958889305591583, + "step": 2389 + }, + { + "epoch": 1.180897071543309, + "grad_norm": 1.260660594043313, + "learning_rate": 7.863642740595873e-06, + "loss": 0.29704710841178894, + "step": 2390 + }, + { + "epoch": 1.1813913258371431, + "grad_norm": 1.1273593973839822, + "learning_rate": 7.855661493759488e-06, + "loss": 0.23283210396766663, + "step": 2391 + }, + { + "epoch": 1.1818855801309773, + "grad_norm": 1.1497387573049556, + "learning_rate": 7.847681678452846e-06, + "loss": 0.22818870842456818, + "step": 2392 + }, + { + "epoch": 1.1823798344248115, + "grad_norm": 1.2334848445567106, + "learning_rate": 7.839703300003163e-06, + "loss": 0.2345077246427536, + "step": 2393 + }, + { + "epoch": 1.1828740887186457, + "grad_norm": 1.3979127898652413, + "learning_rate": 7.831726363736694e-06, + "loss": 0.31161409616470337, + "step": 2394 + }, + { + "epoch": 1.18336834301248, + "grad_norm": 1.3157666615230723, + "learning_rate": 7.823750874978724e-06, + "loss": 0.2958439588546753, + "step": 2395 + }, + { + "epoch": 1.1838625973063142, + "grad_norm": 1.1914805532137183, + "learning_rate": 7.815776839053568e-06, + "loss": 0.24895446002483368, + "step": 2396 + }, + { + "epoch": 1.1843568516001484, + "grad_norm": 1.189611866561264, + "learning_rate": 7.807804261284591e-06, + "loss": 0.2691795825958252, + "step": 2397 + }, + { + "epoch": 1.1848511058939826, + "grad_norm": 1.2282823509277643, + "learning_rate": 7.799833146994165e-06, + "loss": 0.26797783374786377, + "step": 2398 + }, + { + "epoch": 1.1853453601878166, + "grad_norm": 1.2297499766268158, + "learning_rate": 7.791863501503694e-06, + "loss": 0.2665610611438751, + "step": 2399 + }, + { + "epoch": 1.1858396144816508, + "grad_norm": 1.1290863581864232, + "learning_rate": 7.783895330133596e-06, + "loss": 0.24712792038917542, + "step": 2400 + }, + { + "epoch": 1.186333868775485, + "grad_norm": 1.2300895404986125, + "learning_rate": 7.775928638203316e-06, + "loss": 0.24131645262241364, + "step": 2401 + }, + { + "epoch": 1.1868281230693192, + "grad_norm": 1.2566198414342145, + "learning_rate": 7.7679634310313e-06, + "loss": 0.24233923852443695, + "step": 2402 + }, + { + "epoch": 1.1873223773631534, + "grad_norm": 1.2397915401139883, + "learning_rate": 7.759999713935002e-06, + "loss": 0.24929150938987732, + "step": 2403 + }, + { + "epoch": 1.1878166316569876, + "grad_norm": 1.2005274695814647, + "learning_rate": 7.752037492230887e-06, + "loss": 0.266767293214798, + "step": 2404 + }, + { + "epoch": 1.1883108859508218, + "grad_norm": 1.2083997342227277, + "learning_rate": 7.744076771234427e-06, + "loss": 0.257263720035553, + "step": 2405 + }, + { + "epoch": 1.1888051402446558, + "grad_norm": 1.3017758985808945, + "learning_rate": 7.73611755626008e-06, + "loss": 0.26949891448020935, + "step": 2406 + }, + { + "epoch": 1.18929939453849, + "grad_norm": 1.3523825920294412, + "learning_rate": 7.728159852621308e-06, + "loss": 0.250274121761322, + "step": 2407 + }, + { + "epoch": 1.1897936488323242, + "grad_norm": 1.6370645689880403, + "learning_rate": 7.720203665630553e-06, + "loss": 0.2442864030599594, + "step": 2408 + }, + { + "epoch": 1.1902879031261584, + "grad_norm": 1.4258170868908235, + "learning_rate": 7.71224900059926e-06, + "loss": 0.273416131734848, + "step": 2409 + }, + { + "epoch": 1.1907821574199926, + "grad_norm": 1.2547538223250059, + "learning_rate": 7.704295862837845e-06, + "loss": 0.2559645175933838, + "step": 2410 + }, + { + "epoch": 1.1912764117138268, + "grad_norm": 1.3439078919148493, + "learning_rate": 7.696344257655713e-06, + "loss": 0.2793371379375458, + "step": 2411 + }, + { + "epoch": 1.191770666007661, + "grad_norm": 1.1661216324600743, + "learning_rate": 7.688394190361235e-06, + "loss": 0.23739437758922577, + "step": 2412 + }, + { + "epoch": 1.1922649203014952, + "grad_norm": 1.293132062594429, + "learning_rate": 7.680445666261766e-06, + "loss": 0.27027466893196106, + "step": 2413 + }, + { + "epoch": 1.1927591745953292, + "grad_norm": 1.2887121644516222, + "learning_rate": 7.672498690663632e-06, + "loss": 0.2641778886318207, + "step": 2414 + }, + { + "epoch": 1.1932534288891634, + "grad_norm": 1.235898023301149, + "learning_rate": 7.664553268872116e-06, + "loss": 0.25086820125579834, + "step": 2415 + }, + { + "epoch": 1.1937476831829976, + "grad_norm": 1.6761712741491541, + "learning_rate": 7.656609406191467e-06, + "loss": 0.2871254086494446, + "step": 2416 + }, + { + "epoch": 1.1942419374768318, + "grad_norm": 1.193500770631568, + "learning_rate": 7.648667107924893e-06, + "loss": 0.2657528221607208, + "step": 2417 + }, + { + "epoch": 1.194736191770666, + "grad_norm": 1.3739698225148846, + "learning_rate": 7.640726379374564e-06, + "loss": 0.26942694187164307, + "step": 2418 + }, + { + "epoch": 1.1952304460645002, + "grad_norm": 1.1561137180130854, + "learning_rate": 7.632787225841593e-06, + "loss": 0.23883840441703796, + "step": 2419 + }, + { + "epoch": 1.1957247003583344, + "grad_norm": 1.215726770348901, + "learning_rate": 7.624849652626049e-06, + "loss": 0.24837304651737213, + "step": 2420 + }, + { + "epoch": 1.1962189546521684, + "grad_norm": 1.194954932679119, + "learning_rate": 7.616913665026936e-06, + "loss": 0.2882450222969055, + "step": 2421 + }, + { + "epoch": 1.1967132089460026, + "grad_norm": 1.4557191034476904, + "learning_rate": 7.608979268342213e-06, + "loss": 0.25877460837364197, + "step": 2422 + }, + { + "epoch": 1.1972074632398368, + "grad_norm": 1.2343724838571453, + "learning_rate": 7.601046467868767e-06, + "loss": 0.26970750093460083, + "step": 2423 + }, + { + "epoch": 1.197701717533671, + "grad_norm": 1.1598747816375319, + "learning_rate": 7.593115268902423e-06, + "loss": 0.23771706223487854, + "step": 2424 + }, + { + "epoch": 1.1981959718275053, + "grad_norm": 1.1949187968831856, + "learning_rate": 7.585185676737932e-06, + "loss": 0.25420787930488586, + "step": 2425 + }, + { + "epoch": 1.1986902261213395, + "grad_norm": 1.248194263596005, + "learning_rate": 7.577257696668982e-06, + "loss": 0.2551025152206421, + "step": 2426 + }, + { + "epoch": 1.1991844804151737, + "grad_norm": 1.1913659485965633, + "learning_rate": 7.569331333988177e-06, + "loss": 0.2302972972393036, + "step": 2427 + }, + { + "epoch": 1.1996787347090079, + "grad_norm": 1.340176223566515, + "learning_rate": 7.561406593987045e-06, + "loss": 0.25811445713043213, + "step": 2428 + }, + { + "epoch": 1.200172989002842, + "grad_norm": 1.1946803554276415, + "learning_rate": 7.5534834819560235e-06, + "loss": 0.2550782561302185, + "step": 2429 + }, + { + "epoch": 1.200667243296676, + "grad_norm": 1.3588122473637638, + "learning_rate": 7.545562003184474e-06, + "loss": 0.24825535714626312, + "step": 2430 + }, + { + "epoch": 1.2011614975905103, + "grad_norm": 1.3105140055807547, + "learning_rate": 7.537642162960664e-06, + "loss": 0.29703712463378906, + "step": 2431 + }, + { + "epoch": 1.2016557518843445, + "grad_norm": 1.2707072551305245, + "learning_rate": 7.5297239665717625e-06, + "loss": 0.26830747723579407, + "step": 2432 + }, + { + "epoch": 1.2021500061781787, + "grad_norm": 1.2272388404108225, + "learning_rate": 7.521807419303846e-06, + "loss": 0.2428341656923294, + "step": 2433 + }, + { + "epoch": 1.202644260472013, + "grad_norm": 1.3310573803274635, + "learning_rate": 7.513892526441883e-06, + "loss": 0.2843051552772522, + "step": 2434 + }, + { + "epoch": 1.203138514765847, + "grad_norm": 1.297091941411815, + "learning_rate": 7.50597929326975e-06, + "loss": 0.2485228031873703, + "step": 2435 + }, + { + "epoch": 1.203632769059681, + "grad_norm": 1.3716686006321661, + "learning_rate": 7.498067725070206e-06, + "loss": 0.25343626737594604, + "step": 2436 + }, + { + "epoch": 1.2041270233535153, + "grad_norm": 1.3197919626781558, + "learning_rate": 7.490157827124902e-06, + "loss": 0.24906575679779053, + "step": 2437 + }, + { + "epoch": 1.2046212776473495, + "grad_norm": 1.6398204697926184, + "learning_rate": 7.4822496047143665e-06, + "loss": 0.33576443791389465, + "step": 2438 + }, + { + "epoch": 1.2051155319411837, + "grad_norm": 1.341601959864184, + "learning_rate": 7.474343063118023e-06, + "loss": 0.2755683362483978, + "step": 2439 + }, + { + "epoch": 1.205609786235018, + "grad_norm": 1.259839098151577, + "learning_rate": 7.466438207614165e-06, + "loss": 0.2667745351791382, + "step": 2440 + }, + { + "epoch": 1.2061040405288521, + "grad_norm": 1.3942381323272646, + "learning_rate": 7.458535043479959e-06, + "loss": 0.2970271408557892, + "step": 2441 + }, + { + "epoch": 1.2065982948226863, + "grad_norm": 1.2934031608191798, + "learning_rate": 7.450633575991442e-06, + "loss": 0.2628048360347748, + "step": 2442 + }, + { + "epoch": 1.2070925491165205, + "grad_norm": 1.3935428467061275, + "learning_rate": 7.442733810423526e-06, + "loss": 0.29923003911972046, + "step": 2443 + }, + { + "epoch": 1.2075868034103547, + "grad_norm": 1.2121764987473183, + "learning_rate": 7.4348357520499805e-06, + "loss": 0.2486419975757599, + "step": 2444 + }, + { + "epoch": 1.2080810577041887, + "grad_norm": 1.2651423288599317, + "learning_rate": 7.4269394061434315e-06, + "loss": 0.2711118459701538, + "step": 2445 + }, + { + "epoch": 1.208575311998023, + "grad_norm": 1.2689988235231109, + "learning_rate": 7.419044777975371e-06, + "loss": 0.2568815052509308, + "step": 2446 + }, + { + "epoch": 1.2090695662918571, + "grad_norm": 1.3357220203112758, + "learning_rate": 7.411151872816143e-06, + "loss": 0.2546462416648865, + "step": 2447 + }, + { + "epoch": 1.2095638205856913, + "grad_norm": 1.1716595202066384, + "learning_rate": 7.403260695934933e-06, + "loss": 0.23455393314361572, + "step": 2448 + }, + { + "epoch": 1.2100580748795255, + "grad_norm": 1.3263077198790523, + "learning_rate": 7.395371252599779e-06, + "loss": 0.2874235510826111, + "step": 2449 + }, + { + "epoch": 1.2105523291733598, + "grad_norm": 1.2319732877340805, + "learning_rate": 7.387483548077559e-06, + "loss": 0.2462289184331894, + "step": 2450 + }, + { + "epoch": 1.211046583467194, + "grad_norm": 1.381045021384348, + "learning_rate": 7.379597587633998e-06, + "loss": 0.29385364055633545, + "step": 2451 + }, + { + "epoch": 1.211540837761028, + "grad_norm": 1.1902133906710186, + "learning_rate": 7.371713376533642e-06, + "loss": 0.25049760937690735, + "step": 2452 + }, + { + "epoch": 1.2120350920548622, + "grad_norm": 1.267298470174844, + "learning_rate": 7.363830920039887e-06, + "loss": 0.2748974859714508, + "step": 2453 + }, + { + "epoch": 1.2125293463486964, + "grad_norm": 1.2929931198793703, + "learning_rate": 7.355950223414939e-06, + "loss": 0.2707570791244507, + "step": 2454 + }, + { + "epoch": 1.2130236006425306, + "grad_norm": 1.3328464163268134, + "learning_rate": 7.3480712919198474e-06, + "loss": 0.2864024043083191, + "step": 2455 + }, + { + "epoch": 1.2135178549363648, + "grad_norm": 1.400259353784304, + "learning_rate": 7.340194130814466e-06, + "loss": 0.3181900680065155, + "step": 2456 + }, + { + "epoch": 1.214012109230199, + "grad_norm": 1.2994892273470056, + "learning_rate": 7.332318745357483e-06, + "loss": 0.3022974729537964, + "step": 2457 + }, + { + "epoch": 1.2145063635240332, + "grad_norm": 1.2350650698265369, + "learning_rate": 7.324445140806387e-06, + "loss": 0.2850461006164551, + "step": 2458 + }, + { + "epoch": 1.2150006178178674, + "grad_norm": 1.0534315857750147, + "learning_rate": 7.316573322417483e-06, + "loss": 0.21958643198013306, + "step": 2459 + }, + { + "epoch": 1.2154948721117014, + "grad_norm": 1.3531472648001939, + "learning_rate": 7.3087032954458915e-06, + "loss": 0.2517468333244324, + "step": 2460 + }, + { + "epoch": 1.2159891264055356, + "grad_norm": 1.1714370722498957, + "learning_rate": 7.300835065145526e-06, + "loss": 0.26957637071609497, + "step": 2461 + }, + { + "epoch": 1.2164833806993698, + "grad_norm": 1.2755586367674554, + "learning_rate": 7.292968636769103e-06, + "loss": 0.2699058949947357, + "step": 2462 + }, + { + "epoch": 1.216977634993204, + "grad_norm": 1.2382912705778586, + "learning_rate": 7.285104015568138e-06, + "loss": 0.25076431035995483, + "step": 2463 + }, + { + "epoch": 1.2174718892870382, + "grad_norm": 1.2104527847150177, + "learning_rate": 7.277241206792944e-06, + "loss": 0.24862724542617798, + "step": 2464 + }, + { + "epoch": 1.2179661435808724, + "grad_norm": 1.3107261919810722, + "learning_rate": 7.269380215692614e-06, + "loss": 0.27427712082862854, + "step": 2465 + }, + { + "epoch": 1.2184603978747066, + "grad_norm": 1.2946586839730188, + "learning_rate": 7.261521047515041e-06, + "loss": 0.24343061447143555, + "step": 2466 + }, + { + "epoch": 1.2189546521685406, + "grad_norm": 1.1968860231182823, + "learning_rate": 7.253663707506882e-06, + "loss": 0.25482866168022156, + "step": 2467 + }, + { + "epoch": 1.2194489064623748, + "grad_norm": 1.2806570256332481, + "learning_rate": 7.2458082009135964e-06, + "loss": 0.27699458599090576, + "step": 2468 + }, + { + "epoch": 1.219943160756209, + "grad_norm": 1.3000686730507884, + "learning_rate": 7.237954532979401e-06, + "loss": 0.26576149463653564, + "step": 2469 + }, + { + "epoch": 1.2204374150500432, + "grad_norm": 1.2984838025251157, + "learning_rate": 7.230102708947298e-06, + "loss": 0.287861168384552, + "step": 2470 + }, + { + "epoch": 1.2209316693438774, + "grad_norm": 1.2911534198412806, + "learning_rate": 7.2222527340590434e-06, + "loss": 0.25484874844551086, + "step": 2471 + }, + { + "epoch": 1.2214259236377116, + "grad_norm": 1.284847349415858, + "learning_rate": 7.214404613555177e-06, + "loss": 0.26371529698371887, + "step": 2472 + }, + { + "epoch": 1.2219201779315458, + "grad_norm": 1.334957534550205, + "learning_rate": 7.206558352674992e-06, + "loss": 0.23692578077316284, + "step": 2473 + }, + { + "epoch": 1.22241443222538, + "grad_norm": 1.2696744902236006, + "learning_rate": 7.198713956656538e-06, + "loss": 0.26369085907936096, + "step": 2474 + }, + { + "epoch": 1.2229086865192142, + "grad_norm": 1.4374683516439322, + "learning_rate": 7.1908714307366145e-06, + "loss": 0.260580450296402, + "step": 2475 + }, + { + "epoch": 1.2234029408130482, + "grad_norm": 1.280804641850837, + "learning_rate": 7.1830307801507904e-06, + "loss": 0.2693007290363312, + "step": 2476 + }, + { + "epoch": 1.2238971951068824, + "grad_norm": 1.3429546136121409, + "learning_rate": 7.1751920101333695e-06, + "loss": 0.26629775762557983, + "step": 2477 + }, + { + "epoch": 1.2243914494007166, + "grad_norm": 1.3999841706301799, + "learning_rate": 7.167355125917399e-06, + "loss": 0.2963234782218933, + "step": 2478 + }, + { + "epoch": 1.2248857036945509, + "grad_norm": 1.2332551275962955, + "learning_rate": 7.159520132734669e-06, + "loss": 0.24415187537670135, + "step": 2479 + }, + { + "epoch": 1.225379957988385, + "grad_norm": 1.3645078601677985, + "learning_rate": 7.15168703581572e-06, + "loss": 0.2941599190235138, + "step": 2480 + }, + { + "epoch": 1.2258742122822193, + "grad_norm": 1.2551885597461083, + "learning_rate": 7.1438558403898065e-06, + "loss": 0.22807514667510986, + "step": 2481 + }, + { + "epoch": 1.2263684665760535, + "grad_norm": 1.3774209397395383, + "learning_rate": 7.136026551684923e-06, + "loss": 0.28865426778793335, + "step": 2482 + }, + { + "epoch": 1.2268627208698875, + "grad_norm": 1.3250195381886638, + "learning_rate": 7.1281991749277945e-06, + "loss": 0.3015780448913574, + "step": 2483 + }, + { + "epoch": 1.2273569751637217, + "grad_norm": 1.30264219696165, + "learning_rate": 7.12037371534386e-06, + "loss": 0.2521517872810364, + "step": 2484 + }, + { + "epoch": 1.2278512294575559, + "grad_norm": 1.520486974517902, + "learning_rate": 7.1125501781572896e-06, + "loss": 0.2904277443885803, + "step": 2485 + }, + { + "epoch": 1.22834548375139, + "grad_norm": 1.2434155494713983, + "learning_rate": 7.104728568590966e-06, + "loss": 0.26172375679016113, + "step": 2486 + }, + { + "epoch": 1.2288397380452243, + "grad_norm": 1.3588693705399504, + "learning_rate": 7.096908891866483e-06, + "loss": 0.23565448820590973, + "step": 2487 + }, + { + "epoch": 1.2293339923390585, + "grad_norm": 1.276833588621656, + "learning_rate": 7.0890911532041375e-06, + "loss": 0.2550106644630432, + "step": 2488 + }, + { + "epoch": 1.2298282466328927, + "grad_norm": 1.4167484141197517, + "learning_rate": 7.08127535782295e-06, + "loss": 0.3221823573112488, + "step": 2489 + }, + { + "epoch": 1.230322500926727, + "grad_norm": 1.2657124525427264, + "learning_rate": 7.073461510940631e-06, + "loss": 0.26209163665771484, + "step": 2490 + }, + { + "epoch": 1.2308167552205609, + "grad_norm": 1.3626305998908985, + "learning_rate": 7.06564961777359e-06, + "loss": 0.28635868430137634, + "step": 2491 + }, + { + "epoch": 1.231311009514395, + "grad_norm": 1.417027138446056, + "learning_rate": 7.0578396835369355e-06, + "loss": 0.25630202889442444, + "step": 2492 + }, + { + "epoch": 1.2318052638082293, + "grad_norm": 1.233621488661494, + "learning_rate": 7.050031713444474e-06, + "loss": 0.27345454692840576, + "step": 2493 + }, + { + "epoch": 1.2322995181020635, + "grad_norm": 1.2592068756906736, + "learning_rate": 7.042225712708692e-06, + "loss": 0.2365841269493103, + "step": 2494 + }, + { + "epoch": 1.2327937723958977, + "grad_norm": 1.730933189967813, + "learning_rate": 7.03442168654076e-06, + "loss": 0.2891104221343994, + "step": 2495 + }, + { + "epoch": 1.233288026689732, + "grad_norm": 1.3811266669598459, + "learning_rate": 7.026619640150534e-06, + "loss": 0.2713435888290405, + "step": 2496 + }, + { + "epoch": 1.2337822809835661, + "grad_norm": 1.3509192768016722, + "learning_rate": 7.018819578746557e-06, + "loss": 0.28552842140197754, + "step": 2497 + }, + { + "epoch": 1.2342765352774, + "grad_norm": 1.377186562637688, + "learning_rate": 7.011021507536031e-06, + "loss": 0.2731080949306488, + "step": 2498 + }, + { + "epoch": 1.2347707895712343, + "grad_norm": 1.1800591795719682, + "learning_rate": 7.003225431724841e-06, + "loss": 0.27373206615448, + "step": 2499 + }, + { + "epoch": 1.2352650438650685, + "grad_norm": 1.3197536250384188, + "learning_rate": 6.99543135651753e-06, + "loss": 0.24507245421409607, + "step": 2500 + }, + { + "epoch": 1.2357592981589027, + "grad_norm": 1.2680812543691635, + "learning_rate": 6.9876392871173205e-06, + "loss": 0.2653801739215851, + "step": 2501 + }, + { + "epoch": 1.236253552452737, + "grad_norm": 1.115227060544212, + "learning_rate": 6.979849228726079e-06, + "loss": 0.1929643303155899, + "step": 2502 + }, + { + "epoch": 1.2367478067465711, + "grad_norm": 1.330653204132735, + "learning_rate": 6.972061186544341e-06, + "loss": 0.2684918940067291, + "step": 2503 + }, + { + "epoch": 1.2372420610404053, + "grad_norm": 1.2129572179563677, + "learning_rate": 6.964275165771288e-06, + "loss": 0.23158729076385498, + "step": 2504 + }, + { + "epoch": 1.2377363153342396, + "grad_norm": 1.3192284190451669, + "learning_rate": 6.95649117160476e-06, + "loss": 0.24757611751556396, + "step": 2505 + }, + { + "epoch": 1.2382305696280738, + "grad_norm": 1.328208985585749, + "learning_rate": 6.9487092092412425e-06, + "loss": 0.2651844620704651, + "step": 2506 + }, + { + "epoch": 1.2387248239219077, + "grad_norm": 1.3550284074069674, + "learning_rate": 6.940929283875859e-06, + "loss": 0.26745620369911194, + "step": 2507 + }, + { + "epoch": 1.239219078215742, + "grad_norm": 1.2361002758783033, + "learning_rate": 6.933151400702374e-06, + "loss": 0.22088846564292908, + "step": 2508 + }, + { + "epoch": 1.2397133325095762, + "grad_norm": 1.2379679284464757, + "learning_rate": 6.925375564913193e-06, + "loss": 0.2662886381149292, + "step": 2509 + }, + { + "epoch": 1.2402075868034104, + "grad_norm": 1.3634625495618726, + "learning_rate": 6.917601781699357e-06, + "loss": 0.2691834270954132, + "step": 2510 + }, + { + "epoch": 1.2407018410972446, + "grad_norm": 1.1575744185130052, + "learning_rate": 6.909830056250527e-06, + "loss": 0.2110689878463745, + "step": 2511 + }, + { + "epoch": 1.2411960953910788, + "grad_norm": 1.2961548823459923, + "learning_rate": 6.902060393755001e-06, + "loss": 0.29281991720199585, + "step": 2512 + }, + { + "epoch": 1.2416903496849128, + "grad_norm": 1.2724295845366205, + "learning_rate": 6.894292799399688e-06, + "loss": 0.27409040927886963, + "step": 2513 + }, + { + "epoch": 1.242184603978747, + "grad_norm": 1.304980332058365, + "learning_rate": 6.886527278370131e-06, + "loss": 0.29440224170684814, + "step": 2514 + }, + { + "epoch": 1.2426788582725812, + "grad_norm": 1.1224782958445216, + "learning_rate": 6.878763835850475e-06, + "loss": 0.23107948899269104, + "step": 2515 + }, + { + "epoch": 1.2431731125664154, + "grad_norm": 1.55997556893969, + "learning_rate": 6.871002477023488e-06, + "loss": 0.2682652473449707, + "step": 2516 + }, + { + "epoch": 1.2436673668602496, + "grad_norm": 1.2329698948831815, + "learning_rate": 6.863243207070534e-06, + "loss": 0.2935982644557953, + "step": 2517 + }, + { + "epoch": 1.2441616211540838, + "grad_norm": 1.4373018605291157, + "learning_rate": 6.855486031171597e-06, + "loss": 0.29027625918388367, + "step": 2518 + }, + { + "epoch": 1.244655875447918, + "grad_norm": 1.2739101669235458, + "learning_rate": 6.84773095450526e-06, + "loss": 0.25107353925704956, + "step": 2519 + }, + { + "epoch": 1.2451501297417522, + "grad_norm": 1.2325888755211254, + "learning_rate": 6.839977982248697e-06, + "loss": 0.279231995344162, + "step": 2520 + }, + { + "epoch": 1.2456443840355864, + "grad_norm": 1.2006221660421637, + "learning_rate": 6.832227119577677e-06, + "loss": 0.2544802129268646, + "step": 2521 + }, + { + "epoch": 1.2461386383294204, + "grad_norm": 1.397981415575177, + "learning_rate": 6.824478371666573e-06, + "loss": 0.24365633726119995, + "step": 2522 + }, + { + "epoch": 1.2466328926232546, + "grad_norm": 1.1393524200353975, + "learning_rate": 6.816731743688336e-06, + "loss": 0.2673290967941284, + "step": 2523 + }, + { + "epoch": 1.2471271469170888, + "grad_norm": 1.284093438519867, + "learning_rate": 6.808987240814504e-06, + "loss": 0.23896455764770508, + "step": 2524 + }, + { + "epoch": 1.247621401210923, + "grad_norm": 1.200000168994301, + "learning_rate": 6.801244868215192e-06, + "loss": 0.23196406662464142, + "step": 2525 + }, + { + "epoch": 1.2481156555047572, + "grad_norm": 1.2289321548733863, + "learning_rate": 6.793504631059106e-06, + "loss": 0.24249708652496338, + "step": 2526 + }, + { + "epoch": 1.2486099097985914, + "grad_norm": 1.1511217069627229, + "learning_rate": 6.785766534513514e-06, + "loss": 0.2366780787706375, + "step": 2527 + }, + { + "epoch": 1.2491041640924256, + "grad_norm": 1.291146988373714, + "learning_rate": 6.778030583744254e-06, + "loss": 0.2615105211734772, + "step": 2528 + }, + { + "epoch": 1.2495984183862596, + "grad_norm": 1.4688230831159943, + "learning_rate": 6.770296783915738e-06, + "loss": 0.29761314392089844, + "step": 2529 + }, + { + "epoch": 1.2500926726800938, + "grad_norm": 1.2928438568936322, + "learning_rate": 6.762565140190948e-06, + "loss": 0.25020867586135864, + "step": 2530 + }, + { + "epoch": 1.250586926973928, + "grad_norm": 1.3858962507108388, + "learning_rate": 6.754835657731409e-06, + "loss": 0.2716590166091919, + "step": 2531 + }, + { + "epoch": 1.2510811812677622, + "grad_norm": 1.4048062063243787, + "learning_rate": 6.747108341697221e-06, + "loss": 0.27042001485824585, + "step": 2532 + }, + { + "epoch": 1.2515754355615964, + "grad_norm": 1.3297085932201778, + "learning_rate": 6.739383197247023e-06, + "loss": 0.2659035325050354, + "step": 2533 + }, + { + "epoch": 1.2520696898554307, + "grad_norm": 1.3945414928963702, + "learning_rate": 6.731660229538014e-06, + "loss": 0.2803581655025482, + "step": 2534 + }, + { + "epoch": 1.2525639441492649, + "grad_norm": 1.1484885760506975, + "learning_rate": 6.723939443725938e-06, + "loss": 0.24422097206115723, + "step": 2535 + }, + { + "epoch": 1.253058198443099, + "grad_norm": 1.5676789145324774, + "learning_rate": 6.71622084496508e-06, + "loss": 0.30003631114959717, + "step": 2536 + }, + { + "epoch": 1.2535524527369333, + "grad_norm": 1.3207189074013763, + "learning_rate": 6.708504438408265e-06, + "loss": 0.25745317339897156, + "step": 2537 + }, + { + "epoch": 1.2540467070307673, + "grad_norm": 1.3298790802481242, + "learning_rate": 6.700790229206856e-06, + "loss": 0.27648618817329407, + "step": 2538 + }, + { + "epoch": 1.2545409613246015, + "grad_norm": 1.2910375745243117, + "learning_rate": 6.6930782225107536e-06, + "loss": 0.2579975724220276, + "step": 2539 + }, + { + "epoch": 1.2550352156184357, + "grad_norm": 1.3321333943034437, + "learning_rate": 6.68536842346838e-06, + "loss": 0.2806825637817383, + "step": 2540 + }, + { + "epoch": 1.2555294699122699, + "grad_norm": 1.5211080365897773, + "learning_rate": 6.677660837226685e-06, + "loss": 0.2641657888889313, + "step": 2541 + }, + { + "epoch": 1.256023724206104, + "grad_norm": 1.3170844434659201, + "learning_rate": 6.669955468931142e-06, + "loss": 0.25483542680740356, + "step": 2542 + }, + { + "epoch": 1.2565179784999383, + "grad_norm": 1.438596032878092, + "learning_rate": 6.662252323725751e-06, + "loss": 0.264334112405777, + "step": 2543 + }, + { + "epoch": 1.2570122327937723, + "grad_norm": 1.2825942587632855, + "learning_rate": 6.654551406753017e-06, + "loss": 0.2541567385196686, + "step": 2544 + }, + { + "epoch": 1.2575064870876065, + "grad_norm": 1.3007868833040497, + "learning_rate": 6.646852723153965e-06, + "loss": 0.2695424258708954, + "step": 2545 + }, + { + "epoch": 1.2580007413814407, + "grad_norm": 1.2114763710946868, + "learning_rate": 6.63915627806812e-06, + "loss": 0.2694344222545624, + "step": 2546 + }, + { + "epoch": 1.258494995675275, + "grad_norm": 1.3203626104751756, + "learning_rate": 6.631462076633527e-06, + "loss": 0.2695961892604828, + "step": 2547 + }, + { + "epoch": 1.258989249969109, + "grad_norm": 1.43655166025842, + "learning_rate": 6.623770123986719e-06, + "loss": 0.26878753304481506, + "step": 2548 + }, + { + "epoch": 1.2594835042629433, + "grad_norm": 1.4117532208090406, + "learning_rate": 6.616080425262738e-06, + "loss": 0.27568501234054565, + "step": 2549 + }, + { + "epoch": 1.2599777585567775, + "grad_norm": 1.4407785281346286, + "learning_rate": 6.608392985595111e-06, + "loss": 0.2991989254951477, + "step": 2550 + }, + { + "epoch": 1.2604720128506117, + "grad_norm": 1.2938769852574108, + "learning_rate": 6.600707810115869e-06, + "loss": 0.21832239627838135, + "step": 2551 + }, + { + "epoch": 1.260966267144446, + "grad_norm": 1.3528768023288296, + "learning_rate": 6.593024903955525e-06, + "loss": 0.2671685516834259, + "step": 2552 + }, + { + "epoch": 1.26146052143828, + "grad_norm": 1.142061359022944, + "learning_rate": 6.585344272243073e-06, + "loss": 0.23399557173252106, + "step": 2553 + }, + { + "epoch": 1.2619547757321141, + "grad_norm": 1.3000899404630435, + "learning_rate": 6.577665920105996e-06, + "loss": 0.2701990008354187, + "step": 2554 + }, + { + "epoch": 1.2624490300259483, + "grad_norm": 1.216581780326655, + "learning_rate": 6.56998985267025e-06, + "loss": 0.2679189145565033, + "step": 2555 + }, + { + "epoch": 1.2629432843197825, + "grad_norm": 1.3457541131318878, + "learning_rate": 6.562316075060272e-06, + "loss": 0.2597065567970276, + "step": 2556 + }, + { + "epoch": 1.2634375386136167, + "grad_norm": 1.3732680167208262, + "learning_rate": 6.554644592398962e-06, + "loss": 0.2942010462284088, + "step": 2557 + }, + { + "epoch": 1.263931792907451, + "grad_norm": 1.2654921757837638, + "learning_rate": 6.546975409807696e-06, + "loss": 0.2547098994255066, + "step": 2558 + }, + { + "epoch": 1.264426047201285, + "grad_norm": 1.29416806058113, + "learning_rate": 6.539308532406306e-06, + "loss": 0.2779114246368408, + "step": 2559 + }, + { + "epoch": 1.2649203014951191, + "grad_norm": 1.2525651200835928, + "learning_rate": 6.531643965313093e-06, + "loss": 0.22318917512893677, + "step": 2560 + }, + { + "epoch": 1.2654145557889533, + "grad_norm": 1.2931765026229116, + "learning_rate": 6.523981713644814e-06, + "loss": 0.25439128279685974, + "step": 2561 + }, + { + "epoch": 1.2659088100827876, + "grad_norm": 1.1946536852540512, + "learning_rate": 6.516321782516677e-06, + "loss": 0.2317974865436554, + "step": 2562 + }, + { + "epoch": 1.2664030643766218, + "grad_norm": 1.3517228291780166, + "learning_rate": 6.508664177042339e-06, + "loss": 0.273223876953125, + "step": 2563 + }, + { + "epoch": 1.266897318670456, + "grad_norm": 1.3767500694886763, + "learning_rate": 6.501008902333912e-06, + "loss": 0.28408509492874146, + "step": 2564 + }, + { + "epoch": 1.2673915729642902, + "grad_norm": 1.4378995512233899, + "learning_rate": 6.493355963501951e-06, + "loss": 0.2702238857746124, + "step": 2565 + }, + { + "epoch": 1.2678858272581244, + "grad_norm": 1.2819637354130675, + "learning_rate": 6.485705365655441e-06, + "loss": 0.2142164558172226, + "step": 2566 + }, + { + "epoch": 1.2683800815519586, + "grad_norm": 1.4108385899794438, + "learning_rate": 6.478057113901817e-06, + "loss": 0.2654300928115845, + "step": 2567 + }, + { + "epoch": 1.2688743358457928, + "grad_norm": 1.1724627648861543, + "learning_rate": 6.470411213346941e-06, + "loss": 0.24601367115974426, + "step": 2568 + }, + { + "epoch": 1.2693685901396268, + "grad_norm": 1.36613316910106, + "learning_rate": 6.462767669095109e-06, + "loss": 0.26201942563056946, + "step": 2569 + }, + { + "epoch": 1.269862844433461, + "grad_norm": 1.342399065083916, + "learning_rate": 6.455126486249038e-06, + "loss": 0.2839587926864624, + "step": 2570 + }, + { + "epoch": 1.2703570987272952, + "grad_norm": 1.2538564056049797, + "learning_rate": 6.447487669909873e-06, + "loss": 0.21100708842277527, + "step": 2571 + }, + { + "epoch": 1.2708513530211294, + "grad_norm": 1.1457223195177177, + "learning_rate": 6.439851225177185e-06, + "loss": 0.2181582748889923, + "step": 2572 + }, + { + "epoch": 1.2713456073149636, + "grad_norm": 1.397761306307691, + "learning_rate": 6.432217157148948e-06, + "loss": 0.29196488857269287, + "step": 2573 + }, + { + "epoch": 1.2718398616087978, + "grad_norm": 1.3664440708479575, + "learning_rate": 6.424585470921563e-06, + "loss": 0.2365931123495102, + "step": 2574 + }, + { + "epoch": 1.2723341159026318, + "grad_norm": 1.3496940412150429, + "learning_rate": 6.4169561715898255e-06, + "loss": 0.2277393937110901, + "step": 2575 + }, + { + "epoch": 1.272828370196466, + "grad_norm": 1.3624051718280268, + "learning_rate": 6.409329264246956e-06, + "loss": 0.25285032391548157, + "step": 2576 + }, + { + "epoch": 1.2733226244903002, + "grad_norm": 1.2632390853508073, + "learning_rate": 6.401704753984563e-06, + "loss": 0.253650963306427, + "step": 2577 + }, + { + "epoch": 1.2738168787841344, + "grad_norm": 1.264245223392645, + "learning_rate": 6.394082645892668e-06, + "loss": 0.22143784165382385, + "step": 2578 + }, + { + "epoch": 1.2743111330779686, + "grad_norm": 1.3283739907286298, + "learning_rate": 6.3864629450596696e-06, + "loss": 0.27591395378112793, + "step": 2579 + }, + { + "epoch": 1.2748053873718028, + "grad_norm": 1.6236594986793635, + "learning_rate": 6.37884565657238e-06, + "loss": 0.32865333557128906, + "step": 2580 + }, + { + "epoch": 1.275299641665637, + "grad_norm": 1.2172019661301716, + "learning_rate": 6.371230785515992e-06, + "loss": 0.2743702530860901, + "step": 2581 + }, + { + "epoch": 1.2757938959594712, + "grad_norm": 1.2586352823219396, + "learning_rate": 6.3636183369740845e-06, + "loss": 0.23967956006526947, + "step": 2582 + }, + { + "epoch": 1.2762881502533054, + "grad_norm": 1.206746025741565, + "learning_rate": 6.356008316028614e-06, + "loss": 0.2474803626537323, + "step": 2583 + }, + { + "epoch": 1.2767824045471394, + "grad_norm": 1.2591134604976273, + "learning_rate": 6.348400727759925e-06, + "loss": 0.2523267865180969, + "step": 2584 + }, + { + "epoch": 1.2772766588409736, + "grad_norm": 1.3690385191668641, + "learning_rate": 6.340795577246738e-06, + "loss": 0.2549436092376709, + "step": 2585 + }, + { + "epoch": 1.2777709131348078, + "grad_norm": 1.309885921175695, + "learning_rate": 6.333192869566138e-06, + "loss": 0.2602443993091583, + "step": 2586 + }, + { + "epoch": 1.278265167428642, + "grad_norm": 1.248955873440961, + "learning_rate": 6.325592609793588e-06, + "loss": 0.22912462055683136, + "step": 2587 + }, + { + "epoch": 1.2787594217224763, + "grad_norm": 1.3253843576578603, + "learning_rate": 6.317994803002907e-06, + "loss": 0.3004158139228821, + "step": 2588 + }, + { + "epoch": 1.2792536760163105, + "grad_norm": 1.2054603629919527, + "learning_rate": 6.310399454266289e-06, + "loss": 0.25851407647132874, + "step": 2589 + }, + { + "epoch": 1.2797479303101444, + "grad_norm": 1.2857681683589963, + "learning_rate": 6.302806568654277e-06, + "loss": 0.24637526273727417, + "step": 2590 + }, + { + "epoch": 1.2802421846039787, + "grad_norm": 1.2976312908550238, + "learning_rate": 6.295216151235774e-06, + "loss": 0.26500213146209717, + "step": 2591 + }, + { + "epoch": 1.2807364388978129, + "grad_norm": 1.2103490895138174, + "learning_rate": 6.287628207078031e-06, + "loss": 0.24276241660118103, + "step": 2592 + }, + { + "epoch": 1.281230693191647, + "grad_norm": 2.3839558822188787, + "learning_rate": 6.280042741246655e-06, + "loss": 0.27117204666137695, + "step": 2593 + }, + { + "epoch": 1.2817249474854813, + "grad_norm": 1.4461368742366545, + "learning_rate": 6.272459758805596e-06, + "loss": 0.29287856817245483, + "step": 2594 + }, + { + "epoch": 1.2822192017793155, + "grad_norm": 1.4301387064569637, + "learning_rate": 6.26487926481714e-06, + "loss": 0.3065788149833679, + "step": 2595 + }, + { + "epoch": 1.2827134560731497, + "grad_norm": 1.3198078410588965, + "learning_rate": 6.257301264341915e-06, + "loss": 0.2738455533981323, + "step": 2596 + }, + { + "epoch": 1.283207710366984, + "grad_norm": 1.5398007848288653, + "learning_rate": 6.2497257624388915e-06, + "loss": 0.24216318130493164, + "step": 2597 + }, + { + "epoch": 1.283701964660818, + "grad_norm": 1.2565420891983292, + "learning_rate": 6.242152764165368e-06, + "loss": 0.276785671710968, + "step": 2598 + }, + { + "epoch": 1.2841962189546523, + "grad_norm": 1.2307015932000853, + "learning_rate": 6.234582274576961e-06, + "loss": 0.24999365210533142, + "step": 2599 + }, + { + "epoch": 1.2846904732484863, + "grad_norm": 1.2824145770644522, + "learning_rate": 6.227014298727627e-06, + "loss": 0.27714112401008606, + "step": 2600 + }, + { + "epoch": 1.2851847275423205, + "grad_norm": 1.2260344372038856, + "learning_rate": 6.219448841669639e-06, + "loss": 0.2422318160533905, + "step": 2601 + }, + { + "epoch": 1.2856789818361547, + "grad_norm": 1.3255802725159413, + "learning_rate": 6.21188590845359e-06, + "loss": 0.26688697934150696, + "step": 2602 + }, + { + "epoch": 1.286173236129989, + "grad_norm": 1.2753676961687272, + "learning_rate": 6.204325504128379e-06, + "loss": 0.256889671087265, + "step": 2603 + }, + { + "epoch": 1.2866674904238231, + "grad_norm": 1.3013140965176258, + "learning_rate": 6.196767633741225e-06, + "loss": 0.27372461557388306, + "step": 2604 + }, + { + "epoch": 1.287161744717657, + "grad_norm": 1.3064762941978003, + "learning_rate": 6.189212302337663e-06, + "loss": 0.25194403529167175, + "step": 2605 + }, + { + "epoch": 1.2876559990114913, + "grad_norm": 1.2533511197404907, + "learning_rate": 6.181659514961515e-06, + "loss": 0.24381688237190247, + "step": 2606 + }, + { + "epoch": 1.2881502533053255, + "grad_norm": 1.2987400887924563, + "learning_rate": 6.17410927665492e-06, + "loss": 0.255805104970932, + "step": 2607 + }, + { + "epoch": 1.2886445075991597, + "grad_norm": 1.270289405479379, + "learning_rate": 6.166561592458307e-06, + "loss": 0.25070682168006897, + "step": 2608 + }, + { + "epoch": 1.289138761892994, + "grad_norm": 1.1954868388063873, + "learning_rate": 6.159016467410397e-06, + "loss": 0.24080060422420502, + "step": 2609 + }, + { + "epoch": 1.2896330161868281, + "grad_norm": 1.3524298235557053, + "learning_rate": 6.151473906548215e-06, + "loss": 0.28041762113571167, + "step": 2610 + }, + { + "epoch": 1.2901272704806623, + "grad_norm": 1.3891353799265191, + "learning_rate": 6.143933914907065e-06, + "loss": 0.2624273896217346, + "step": 2611 + }, + { + "epoch": 1.2906215247744965, + "grad_norm": 1.3838932352032651, + "learning_rate": 6.136396497520536e-06, + "loss": 0.2658112049102783, + "step": 2612 + }, + { + "epoch": 1.2911157790683307, + "grad_norm": 1.3103712430992434, + "learning_rate": 6.1288616594205e-06, + "loss": 0.27714237570762634, + "step": 2613 + }, + { + "epoch": 1.291610033362165, + "grad_norm": 1.2276105048536776, + "learning_rate": 6.121329405637111e-06, + "loss": 0.23253153264522552, + "step": 2614 + }, + { + "epoch": 1.292104287655999, + "grad_norm": 1.2168125400378236, + "learning_rate": 6.1137997411987915e-06, + "loss": 0.2438409924507141, + "step": 2615 + }, + { + "epoch": 1.2925985419498331, + "grad_norm": 1.3814066274151728, + "learning_rate": 6.106272671132236e-06, + "loss": 0.24013856053352356, + "step": 2616 + }, + { + "epoch": 1.2930927962436674, + "grad_norm": 1.4362282063831207, + "learning_rate": 6.098748200462408e-06, + "loss": 0.2850446403026581, + "step": 2617 + }, + { + "epoch": 1.2935870505375016, + "grad_norm": 1.3403873033762816, + "learning_rate": 6.0912263342125445e-06, + "loss": 0.22195187211036682, + "step": 2618 + }, + { + "epoch": 1.2940813048313358, + "grad_norm": 1.3701004376420556, + "learning_rate": 6.083707077404129e-06, + "loss": 0.29266390204429626, + "step": 2619 + }, + { + "epoch": 1.29457555912517, + "grad_norm": 1.2103981171479565, + "learning_rate": 6.076190435056913e-06, + "loss": 0.26741352677345276, + "step": 2620 + }, + { + "epoch": 1.295069813419004, + "grad_norm": 1.259544042020202, + "learning_rate": 6.068676412188892e-06, + "loss": 0.26014602184295654, + "step": 2621 + }, + { + "epoch": 1.2955640677128382, + "grad_norm": 1.2871395012144142, + "learning_rate": 6.061165013816333e-06, + "loss": 0.2561393976211548, + "step": 2622 + }, + { + "epoch": 1.2960583220066724, + "grad_norm": 1.312678751233067, + "learning_rate": 6.053656244953728e-06, + "loss": 0.2952851951122284, + "step": 2623 + }, + { + "epoch": 1.2965525763005066, + "grad_norm": 1.2817239432203538, + "learning_rate": 6.046150110613831e-06, + "loss": 0.2830423414707184, + "step": 2624 + }, + { + "epoch": 1.2970468305943408, + "grad_norm": 1.2514529269380406, + "learning_rate": 6.038646615807622e-06, + "loss": 0.22306497395038605, + "step": 2625 + }, + { + "epoch": 1.297541084888175, + "grad_norm": 1.3018072981213034, + "learning_rate": 6.031145765544333e-06, + "loss": 0.23291784524917603, + "step": 2626 + }, + { + "epoch": 1.2980353391820092, + "grad_norm": 1.3763927806121403, + "learning_rate": 6.023647564831425e-06, + "loss": 0.2376563400030136, + "step": 2627 + }, + { + "epoch": 1.2985295934758434, + "grad_norm": 1.3283544756021872, + "learning_rate": 6.016152018674588e-06, + "loss": 0.2873516380786896, + "step": 2628 + }, + { + "epoch": 1.2990238477696776, + "grad_norm": 1.2475849952661122, + "learning_rate": 6.00865913207774e-06, + "loss": 0.2416999638080597, + "step": 2629 + }, + { + "epoch": 1.2995181020635118, + "grad_norm": 1.2254304075146119, + "learning_rate": 6.001168910043023e-06, + "loss": 0.2627726197242737, + "step": 2630 + }, + { + "epoch": 1.3000123563573458, + "grad_norm": 1.4025542210635493, + "learning_rate": 5.993681357570809e-06, + "loss": 0.25375279784202576, + "step": 2631 + }, + { + "epoch": 1.30050661065118, + "grad_norm": 1.3348797401747288, + "learning_rate": 5.986196479659676e-06, + "loss": 0.2853030562400818, + "step": 2632 + }, + { + "epoch": 1.3010008649450142, + "grad_norm": 1.3089867713489467, + "learning_rate": 5.978714281306425e-06, + "loss": 0.2626519501209259, + "step": 2633 + }, + { + "epoch": 1.3014951192388484, + "grad_norm": 1.4566011034207051, + "learning_rate": 5.971234767506057e-06, + "loss": 0.2895713448524475, + "step": 2634 + }, + { + "epoch": 1.3019893735326826, + "grad_norm": 1.2504104998957544, + "learning_rate": 5.9637579432518e-06, + "loss": 0.24617832899093628, + "step": 2635 + }, + { + "epoch": 1.3024836278265166, + "grad_norm": 1.2199824881911456, + "learning_rate": 5.956283813535066e-06, + "loss": 0.25497785210609436, + "step": 2636 + }, + { + "epoch": 1.3029778821203508, + "grad_norm": 1.3200409304272294, + "learning_rate": 5.948812383345484e-06, + "loss": 0.25832462310791016, + "step": 2637 + }, + { + "epoch": 1.303472136414185, + "grad_norm": 1.214232538768618, + "learning_rate": 5.941343657670866e-06, + "loss": 0.24273909628391266, + "step": 2638 + }, + { + "epoch": 1.3039663907080192, + "grad_norm": 1.2844572342866962, + "learning_rate": 5.933877641497232e-06, + "loss": 0.2668009400367737, + "step": 2639 + }, + { + "epoch": 1.3044606450018534, + "grad_norm": 1.2388896928667246, + "learning_rate": 5.92641433980879e-06, + "loss": 0.2519373595714569, + "step": 2640 + }, + { + "epoch": 1.3049548992956876, + "grad_norm": 1.3760811135868023, + "learning_rate": 5.918953757587928e-06, + "loss": 0.30091768503189087, + "step": 2641 + }, + { + "epoch": 1.3054491535895218, + "grad_norm": 1.207587317973019, + "learning_rate": 5.911495899815225e-06, + "loss": 0.2504241466522217, + "step": 2642 + }, + { + "epoch": 1.305943407883356, + "grad_norm": 1.1902656490822856, + "learning_rate": 5.904040771469444e-06, + "loss": 0.24741190671920776, + "step": 2643 + }, + { + "epoch": 1.3064376621771903, + "grad_norm": 1.2559463008488698, + "learning_rate": 5.896588377527519e-06, + "loss": 0.2636350691318512, + "step": 2644 + }, + { + "epoch": 1.3069319164710245, + "grad_norm": 1.1981836589630794, + "learning_rate": 5.889138722964563e-06, + "loss": 0.22512421011924744, + "step": 2645 + }, + { + "epoch": 1.3074261707648585, + "grad_norm": 1.2451009493990417, + "learning_rate": 5.8816918127538546e-06, + "loss": 0.26447975635528564, + "step": 2646 + }, + { + "epoch": 1.3079204250586927, + "grad_norm": 1.1839899877527418, + "learning_rate": 5.874247651866853e-06, + "loss": 0.22084996104240417, + "step": 2647 + }, + { + "epoch": 1.3084146793525269, + "grad_norm": 1.2774730254159221, + "learning_rate": 5.8668062452731715e-06, + "loss": 0.24033552408218384, + "step": 2648 + }, + { + "epoch": 1.308908933646361, + "grad_norm": 1.305578072115893, + "learning_rate": 5.8593675979405795e-06, + "loss": 0.24829509854316711, + "step": 2649 + }, + { + "epoch": 1.3094031879401953, + "grad_norm": 1.3651670637998603, + "learning_rate": 5.851931714835016e-06, + "loss": 0.29011303186416626, + "step": 2650 + }, + { + "epoch": 1.3098974422340295, + "grad_norm": 1.3474001783390817, + "learning_rate": 5.8444986009205754e-06, + "loss": 0.273196280002594, + "step": 2651 + }, + { + "epoch": 1.3103916965278635, + "grad_norm": 1.4905929158728624, + "learning_rate": 5.837068261159491e-06, + "loss": 0.28843480348587036, + "step": 2652 + }, + { + "epoch": 1.3108859508216977, + "grad_norm": 1.297768951304802, + "learning_rate": 5.829640700512159e-06, + "loss": 0.25919461250305176, + "step": 2653 + }, + { + "epoch": 1.3113802051155319, + "grad_norm": 1.4662917967499176, + "learning_rate": 5.822215923937105e-06, + "loss": 0.24588480591773987, + "step": 2654 + }, + { + "epoch": 1.311874459409366, + "grad_norm": 1.4533199098003418, + "learning_rate": 5.814793936391001e-06, + "loss": 0.26138943433761597, + "step": 2655 + }, + { + "epoch": 1.3123687137032003, + "grad_norm": 1.3892499103405112, + "learning_rate": 5.807374742828675e-06, + "loss": 0.2740943729877472, + "step": 2656 + }, + { + "epoch": 1.3128629679970345, + "grad_norm": 1.2689667055719156, + "learning_rate": 5.7999583482030605e-06, + "loss": 0.2307349294424057, + "step": 2657 + }, + { + "epoch": 1.3133572222908687, + "grad_norm": 1.4143938245126972, + "learning_rate": 5.792544757465242e-06, + "loss": 0.28424161672592163, + "step": 2658 + }, + { + "epoch": 1.313851476584703, + "grad_norm": 1.1640800807114133, + "learning_rate": 5.785133975564426e-06, + "loss": 0.2586106061935425, + "step": 2659 + }, + { + "epoch": 1.3143457308785371, + "grad_norm": 1.3677717868907802, + "learning_rate": 5.7777260074479455e-06, + "loss": 0.23268333077430725, + "step": 2660 + }, + { + "epoch": 1.314839985172371, + "grad_norm": 1.3455357811423037, + "learning_rate": 5.770320858061254e-06, + "loss": 0.22144779562950134, + "step": 2661 + }, + { + "epoch": 1.3153342394662053, + "grad_norm": 1.1539678539958322, + "learning_rate": 5.762918532347925e-06, + "loss": 0.2450334131717682, + "step": 2662 + }, + { + "epoch": 1.3158284937600395, + "grad_norm": 1.3018328480210146, + "learning_rate": 5.7555190352496375e-06, + "loss": 0.24483400583267212, + "step": 2663 + }, + { + "epoch": 1.3163227480538737, + "grad_norm": 1.484825672376601, + "learning_rate": 5.748122371706198e-06, + "loss": 0.2590720057487488, + "step": 2664 + }, + { + "epoch": 1.316817002347708, + "grad_norm": 1.5800261617865896, + "learning_rate": 5.740728546655515e-06, + "loss": 0.27116847038269043, + "step": 2665 + }, + { + "epoch": 1.3173112566415421, + "grad_norm": 1.3133697295364004, + "learning_rate": 5.733337565033595e-06, + "loss": 0.2720273435115814, + "step": 2666 + }, + { + "epoch": 1.3178055109353761, + "grad_norm": 1.223977156924706, + "learning_rate": 5.7259494317745514e-06, + "loss": 0.22150173783302307, + "step": 2667 + }, + { + "epoch": 1.3182997652292103, + "grad_norm": 1.2771960781536442, + "learning_rate": 5.718564151810597e-06, + "loss": 0.27474984526634216, + "step": 2668 + }, + { + "epoch": 1.3187940195230445, + "grad_norm": 1.3886425317966573, + "learning_rate": 5.711181730072044e-06, + "loss": 0.2547265291213989, + "step": 2669 + }, + { + "epoch": 1.3192882738168787, + "grad_norm": 1.3822455459704068, + "learning_rate": 5.703802171487286e-06, + "loss": 0.2686036229133606, + "step": 2670 + }, + { + "epoch": 1.319782528110713, + "grad_norm": 1.2588694556349689, + "learning_rate": 5.696425480982814e-06, + "loss": 0.2276458591222763, + "step": 2671 + }, + { + "epoch": 1.3202767824045472, + "grad_norm": 1.3366284026803796, + "learning_rate": 5.6890516634832e-06, + "loss": 0.25005075335502625, + "step": 2672 + }, + { + "epoch": 1.3207710366983814, + "grad_norm": 1.4092724528348008, + "learning_rate": 5.681680723911104e-06, + "loss": 0.25919869542121887, + "step": 2673 + }, + { + "epoch": 1.3212652909922156, + "grad_norm": 1.3254224112633677, + "learning_rate": 5.6743126671872505e-06, + "loss": 0.2684757709503174, + "step": 2674 + }, + { + "epoch": 1.3217595452860498, + "grad_norm": 1.2529305606098464, + "learning_rate": 5.666947498230451e-06, + "loss": 0.2554991543292999, + "step": 2675 + }, + { + "epoch": 1.322253799579884, + "grad_norm": 1.3734571061597927, + "learning_rate": 5.6595852219575975e-06, + "loss": 0.27026665210723877, + "step": 2676 + }, + { + "epoch": 1.322748053873718, + "grad_norm": 1.5029882994051502, + "learning_rate": 5.652225843283629e-06, + "loss": 0.3248092234134674, + "step": 2677 + }, + { + "epoch": 1.3232423081675522, + "grad_norm": 1.3299866785479277, + "learning_rate": 5.644869367121564e-06, + "loss": 0.2554503083229065, + "step": 2678 + }, + { + "epoch": 1.3237365624613864, + "grad_norm": 1.4099986774485116, + "learning_rate": 5.637515798382488e-06, + "loss": 0.25482693314552307, + "step": 2679 + }, + { + "epoch": 1.3242308167552206, + "grad_norm": 1.268292092612611, + "learning_rate": 5.630165141975523e-06, + "loss": 0.24664446711540222, + "step": 2680 + }, + { + "epoch": 1.3247250710490548, + "grad_norm": 1.3674712589344702, + "learning_rate": 5.622817402807879e-06, + "loss": 0.23855865001678467, + "step": 2681 + }, + { + "epoch": 1.325219325342889, + "grad_norm": 1.28659959156705, + "learning_rate": 5.615472585784796e-06, + "loss": 0.2847699820995331, + "step": 2682 + }, + { + "epoch": 1.325713579636723, + "grad_norm": 1.3902791844570088, + "learning_rate": 5.608130695809564e-06, + "loss": 0.2705647051334381, + "step": 2683 + }, + { + "epoch": 1.3262078339305572, + "grad_norm": 1.3726972299660716, + "learning_rate": 5.600791737783523e-06, + "loss": 0.30135318636894226, + "step": 2684 + }, + { + "epoch": 1.3267020882243914, + "grad_norm": 1.3006770767718296, + "learning_rate": 5.593455716606069e-06, + "loss": 0.261536180973053, + "step": 2685 + }, + { + "epoch": 1.3271963425182256, + "grad_norm": 1.2249107195075626, + "learning_rate": 5.586122637174614e-06, + "loss": 0.24006187915802002, + "step": 2686 + }, + { + "epoch": 1.3276905968120598, + "grad_norm": 1.2887498899635654, + "learning_rate": 5.578792504384618e-06, + "loss": 0.27928346395492554, + "step": 2687 + }, + { + "epoch": 1.328184851105894, + "grad_norm": 1.1715759673643904, + "learning_rate": 5.5714653231295745e-06, + "loss": 0.24134980142116547, + "step": 2688 + }, + { + "epoch": 1.3286791053997282, + "grad_norm": 1.2633540397916776, + "learning_rate": 5.5641410983010055e-06, + "loss": 0.27914801239967346, + "step": 2689 + }, + { + "epoch": 1.3291733596935624, + "grad_norm": 1.6348254119913803, + "learning_rate": 5.55681983478846e-06, + "loss": 0.2735476493835449, + "step": 2690 + }, + { + "epoch": 1.3296676139873966, + "grad_norm": 1.3384777828423575, + "learning_rate": 5.549501537479511e-06, + "loss": 0.24919739365577698, + "step": 2691 + }, + { + "epoch": 1.3301618682812306, + "grad_norm": 1.430948519009228, + "learning_rate": 5.542186211259737e-06, + "loss": 0.25435787439346313, + "step": 2692 + }, + { + "epoch": 1.3306561225750648, + "grad_norm": 1.2533415908145504, + "learning_rate": 5.534873861012763e-06, + "loss": 0.2502862811088562, + "step": 2693 + }, + { + "epoch": 1.331150376868899, + "grad_norm": 1.5771700033159861, + "learning_rate": 5.527564491620195e-06, + "loss": 0.25752580165863037, + "step": 2694 + }, + { + "epoch": 1.3316446311627332, + "grad_norm": 1.332305251527839, + "learning_rate": 5.520258107961671e-06, + "loss": 0.22301846742630005, + "step": 2695 + }, + { + "epoch": 1.3321388854565674, + "grad_norm": 1.4890781870784164, + "learning_rate": 5.512954714914825e-06, + "loss": 0.24581964313983917, + "step": 2696 + }, + { + "epoch": 1.3326331397504017, + "grad_norm": 1.3113609641171107, + "learning_rate": 5.5056543173553e-06, + "loss": 0.271970272064209, + "step": 2697 + }, + { + "epoch": 1.3331273940442356, + "grad_norm": 1.2432947451070444, + "learning_rate": 5.498356920156735e-06, + "loss": 0.23041053116321564, + "step": 2698 + }, + { + "epoch": 1.3336216483380698, + "grad_norm": 1.3239879393507852, + "learning_rate": 5.491062528190775e-06, + "loss": 0.2338491678237915, + "step": 2699 + }, + { + "epoch": 1.334115902631904, + "grad_norm": 1.3971989589857847, + "learning_rate": 5.483771146327037e-06, + "loss": 0.2667239010334015, + "step": 2700 + }, + { + "epoch": 1.3346101569257383, + "grad_norm": 1.1737606299055239, + "learning_rate": 5.4764827794331586e-06, + "loss": 0.24761441349983215, + "step": 2701 + }, + { + "epoch": 1.3351044112195725, + "grad_norm": 1.2384835240862428, + "learning_rate": 5.469197432374747e-06, + "loss": 0.24087639153003693, + "step": 2702 + }, + { + "epoch": 1.3355986655134067, + "grad_norm": 1.3287799587341789, + "learning_rate": 5.461915110015386e-06, + "loss": 0.26774898171424866, + "step": 2703 + }, + { + "epoch": 1.3360929198072409, + "grad_norm": 1.432719946516567, + "learning_rate": 5.454635817216658e-06, + "loss": 0.2820417284965515, + "step": 2704 + }, + { + "epoch": 1.336587174101075, + "grad_norm": 1.408646831955897, + "learning_rate": 5.447359558838113e-06, + "loss": 0.2891086935997009, + "step": 2705 + }, + { + "epoch": 1.3370814283949093, + "grad_norm": 1.370327694474157, + "learning_rate": 5.440086339737277e-06, + "loss": 0.24551361799240112, + "step": 2706 + }, + { + "epoch": 1.3375756826887435, + "grad_norm": 1.3889596017030068, + "learning_rate": 5.432816164769648e-06, + "loss": 0.2293522208929062, + "step": 2707 + }, + { + "epoch": 1.3380699369825775, + "grad_norm": 1.255610549812546, + "learning_rate": 5.425549038788693e-06, + "loss": 0.22325105965137482, + "step": 2708 + }, + { + "epoch": 1.3385641912764117, + "grad_norm": 1.3152207031427636, + "learning_rate": 5.4182849666458315e-06, + "loss": 0.2263861447572708, + "step": 2709 + }, + { + "epoch": 1.339058445570246, + "grad_norm": 1.2663328789435477, + "learning_rate": 5.411023953190466e-06, + "loss": 0.26902303099632263, + "step": 2710 + }, + { + "epoch": 1.33955269986408, + "grad_norm": 1.4136099878472004, + "learning_rate": 5.403766003269944e-06, + "loss": 0.26154825091362, + "step": 2711 + }, + { + "epoch": 1.3400469541579143, + "grad_norm": 1.32960722740892, + "learning_rate": 5.396511121729562e-06, + "loss": 0.2878270745277405, + "step": 2712 + }, + { + "epoch": 1.3405412084517483, + "grad_norm": 1.3136699200223048, + "learning_rate": 5.389259313412581e-06, + "loss": 0.26206687092781067, + "step": 2713 + }, + { + "epoch": 1.3410354627455825, + "grad_norm": 1.4998302342686003, + "learning_rate": 5.382010583160201e-06, + "loss": 0.25612518191337585, + "step": 2714 + }, + { + "epoch": 1.3415297170394167, + "grad_norm": 1.2688327982594605, + "learning_rate": 5.374764935811574e-06, + "loss": 0.25600868463516235, + "step": 2715 + }, + { + "epoch": 1.342023971333251, + "grad_norm": 1.274882827976935, + "learning_rate": 5.367522376203787e-06, + "loss": 0.24837616086006165, + "step": 2716 + }, + { + "epoch": 1.3425182256270851, + "grad_norm": 1.2814047275641038, + "learning_rate": 5.360282909171875e-06, + "loss": 0.23487885296344757, + "step": 2717 + }, + { + "epoch": 1.3430124799209193, + "grad_norm": 1.2024219184737237, + "learning_rate": 5.353046539548797e-06, + "loss": 0.22786842286586761, + "step": 2718 + }, + { + "epoch": 1.3435067342147535, + "grad_norm": 1.288373437821988, + "learning_rate": 5.3458132721654564e-06, + "loss": 0.2198137640953064, + "step": 2719 + }, + { + "epoch": 1.3440009885085877, + "grad_norm": 1.157338464361865, + "learning_rate": 5.338583111850671e-06, + "loss": 0.20056495070457458, + "step": 2720 + }, + { + "epoch": 1.344495242802422, + "grad_norm": 1.2341328448147324, + "learning_rate": 5.331356063431195e-06, + "loss": 0.21636295318603516, + "step": 2721 + }, + { + "epoch": 1.3449894970962561, + "grad_norm": 1.2390666617057948, + "learning_rate": 5.32413213173171e-06, + "loss": 0.23933230340480804, + "step": 2722 + }, + { + "epoch": 1.3454837513900901, + "grad_norm": 1.3024836233276083, + "learning_rate": 5.316911321574799e-06, + "loss": 0.2402106523513794, + "step": 2723 + }, + { + "epoch": 1.3459780056839243, + "grad_norm": 1.252933113923405, + "learning_rate": 5.309693637780979e-06, + "loss": 0.22524669766426086, + "step": 2724 + }, + { + "epoch": 1.3464722599777585, + "grad_norm": 1.3140972939485838, + "learning_rate": 5.302479085168668e-06, + "loss": 0.25381600856781006, + "step": 2725 + }, + { + "epoch": 1.3469665142715928, + "grad_norm": 1.2857997911307526, + "learning_rate": 5.295267668554202e-06, + "loss": 0.2614738643169403, + "step": 2726 + }, + { + "epoch": 1.347460768565427, + "grad_norm": 8.575818718402259, + "learning_rate": 5.288059392751817e-06, + "loss": 0.2701472043991089, + "step": 2727 + }, + { + "epoch": 1.3479550228592612, + "grad_norm": 1.378318405059408, + "learning_rate": 5.280854262573661e-06, + "loss": 0.2788996696472168, + "step": 2728 + }, + { + "epoch": 1.3484492771530951, + "grad_norm": 1.2759693341337726, + "learning_rate": 5.273652282829764e-06, + "loss": 0.2419927418231964, + "step": 2729 + }, + { + "epoch": 1.3489435314469294, + "grad_norm": 1.4943656047554885, + "learning_rate": 5.266453458328071e-06, + "loss": 0.26454097032546997, + "step": 2730 + }, + { + "epoch": 1.3494377857407636, + "grad_norm": 1.3109211241308218, + "learning_rate": 5.259257793874421e-06, + "loss": 0.24090510606765747, + "step": 2731 + }, + { + "epoch": 1.3499320400345978, + "grad_norm": 1.3390086912520884, + "learning_rate": 5.252065294272528e-06, + "loss": 0.27343428134918213, + "step": 2732 + }, + { + "epoch": 1.350426294328432, + "grad_norm": 1.3272957509132868, + "learning_rate": 5.244875964324005e-06, + "loss": 0.2623448967933655, + "step": 2733 + }, + { + "epoch": 1.3509205486222662, + "grad_norm": 1.2273005978142049, + "learning_rate": 5.237689808828346e-06, + "loss": 0.22721052169799805, + "step": 2734 + }, + { + "epoch": 1.3514148029161004, + "grad_norm": 1.4111267721919942, + "learning_rate": 5.230506832582924e-06, + "loss": 0.26385387778282166, + "step": 2735 + }, + { + "epoch": 1.3519090572099346, + "grad_norm": 1.4309565613654673, + "learning_rate": 5.223327040382995e-06, + "loss": 0.2679533064365387, + "step": 2736 + }, + { + "epoch": 1.3524033115037688, + "grad_norm": 1.285385576934023, + "learning_rate": 5.2161504370216855e-06, + "loss": 0.25042447447776794, + "step": 2737 + }, + { + "epoch": 1.3528975657976028, + "grad_norm": 1.3420398780717075, + "learning_rate": 5.2089770272899845e-06, + "loss": 0.22735297679901123, + "step": 2738 + }, + { + "epoch": 1.353391820091437, + "grad_norm": 1.2715261749804811, + "learning_rate": 5.201806815976772e-06, + "loss": 0.25517284870147705, + "step": 2739 + }, + { + "epoch": 1.3538860743852712, + "grad_norm": 1.4834789867138143, + "learning_rate": 5.194639807868767e-06, + "loss": 0.2942652702331543, + "step": 2740 + }, + { + "epoch": 1.3543803286791054, + "grad_norm": 1.2535180106339032, + "learning_rate": 5.187476007750567e-06, + "loss": 0.2605661153793335, + "step": 2741 + }, + { + "epoch": 1.3548745829729396, + "grad_norm": 1.34702814682356, + "learning_rate": 5.1803154204046215e-06, + "loss": 0.22976648807525635, + "step": 2742 + }, + { + "epoch": 1.3553688372667738, + "grad_norm": 1.2786328684416228, + "learning_rate": 5.173158050611236e-06, + "loss": 0.24301470816135406, + "step": 2743 + }, + { + "epoch": 1.3558630915606078, + "grad_norm": 1.3509518199555386, + "learning_rate": 5.166003903148568e-06, + "loss": 0.2714199125766754, + "step": 2744 + }, + { + "epoch": 1.356357345854442, + "grad_norm": 1.4130809131188478, + "learning_rate": 5.15885298279263e-06, + "loss": 0.27004045248031616, + "step": 2745 + }, + { + "epoch": 1.3568516001482762, + "grad_norm": 1.1866112739948385, + "learning_rate": 5.151705294317262e-06, + "loss": 0.2062053680419922, + "step": 2746 + }, + { + "epoch": 1.3573458544421104, + "grad_norm": 1.3476275860643891, + "learning_rate": 5.144560842494168e-06, + "loss": 0.2589803636074066, + "step": 2747 + }, + { + "epoch": 1.3578401087359446, + "grad_norm": 1.4207662826517113, + "learning_rate": 5.137419632092886e-06, + "loss": 0.26469242572784424, + "step": 2748 + }, + { + "epoch": 1.3583343630297788, + "grad_norm": 1.217607994018294, + "learning_rate": 5.130281667880774e-06, + "loss": 0.26241326332092285, + "step": 2749 + }, + { + "epoch": 1.358828617323613, + "grad_norm": 1.375829317891462, + "learning_rate": 5.123146954623038e-06, + "loss": 0.2674810290336609, + "step": 2750 + }, + { + "epoch": 1.3593228716174472, + "grad_norm": 1.3872924823998294, + "learning_rate": 5.116015497082719e-06, + "loss": 0.23186063766479492, + "step": 2751 + }, + { + "epoch": 1.3598171259112815, + "grad_norm": 1.3207469475464653, + "learning_rate": 5.108887300020669e-06, + "loss": 0.2794165313243866, + "step": 2752 + }, + { + "epoch": 1.3603113802051157, + "grad_norm": 1.2682065300683938, + "learning_rate": 5.1017623681955705e-06, + "loss": 0.25263023376464844, + "step": 2753 + }, + { + "epoch": 1.3608056344989496, + "grad_norm": 1.385223404499901, + "learning_rate": 5.0946407063639315e-06, + "loss": 0.2503500282764435, + "step": 2754 + }, + { + "epoch": 1.3612998887927839, + "grad_norm": 1.1490078969357793, + "learning_rate": 5.087522319280061e-06, + "loss": 0.21871569752693176, + "step": 2755 + }, + { + "epoch": 1.361794143086618, + "grad_norm": 1.3919853358310244, + "learning_rate": 5.080407211696103e-06, + "loss": 0.2790142893791199, + "step": 2756 + }, + { + "epoch": 1.3622883973804523, + "grad_norm": 1.3837841689522787, + "learning_rate": 5.073295388362003e-06, + "loss": 0.27197304368019104, + "step": 2757 + }, + { + "epoch": 1.3627826516742865, + "grad_norm": 1.3248855835987599, + "learning_rate": 5.066186854025502e-06, + "loss": 0.2402152568101883, + "step": 2758 + }, + { + "epoch": 1.3632769059681207, + "grad_norm": 1.3193984824612894, + "learning_rate": 5.059081613432162e-06, + "loss": 0.24418887495994568, + "step": 2759 + }, + { + "epoch": 1.3637711602619547, + "grad_norm": 1.1840901033348532, + "learning_rate": 5.05197967132534e-06, + "loss": 0.2239491045475006, + "step": 2760 + }, + { + "epoch": 1.3642654145557889, + "grad_norm": 1.3401183348354848, + "learning_rate": 5.044881032446192e-06, + "loss": 0.25177091360092163, + "step": 2761 + }, + { + "epoch": 1.364759668849623, + "grad_norm": 1.2524679914953787, + "learning_rate": 5.0377857015336655e-06, + "loss": 0.25462138652801514, + "step": 2762 + }, + { + "epoch": 1.3652539231434573, + "grad_norm": 1.154660335850044, + "learning_rate": 5.0306936833245034e-06, + "loss": 0.21030092239379883, + "step": 2763 + }, + { + "epoch": 1.3657481774372915, + "grad_norm": 1.2778480955324765, + "learning_rate": 5.0236049825532355e-06, + "loss": 0.24033348262310028, + "step": 2764 + }, + { + "epoch": 1.3662424317311257, + "grad_norm": 1.2874693424331807, + "learning_rate": 5.016519603952177e-06, + "loss": 0.20803815126419067, + "step": 2765 + }, + { + "epoch": 1.36673668602496, + "grad_norm": 1.3360777408248645, + "learning_rate": 5.00943755225143e-06, + "loss": 0.21589599549770355, + "step": 2766 + }, + { + "epoch": 1.367230940318794, + "grad_norm": 1.3112690340132882, + "learning_rate": 5.00235883217886e-06, + "loss": 0.2690975069999695, + "step": 2767 + }, + { + "epoch": 1.3677251946126283, + "grad_norm": 1.395793399890879, + "learning_rate": 4.995283448460131e-06, + "loss": 0.2368423044681549, + "step": 2768 + }, + { + "epoch": 1.3682194489064623, + "grad_norm": 1.428306560095472, + "learning_rate": 4.988211405818661e-06, + "loss": 0.2801262140274048, + "step": 2769 + }, + { + "epoch": 1.3687137032002965, + "grad_norm": 1.4209027545437471, + "learning_rate": 4.981142708975647e-06, + "loss": 0.2777586877346039, + "step": 2770 + }, + { + "epoch": 1.3692079574941307, + "grad_norm": 1.1921679323806382, + "learning_rate": 4.97407736265005e-06, + "loss": 0.2400980144739151, + "step": 2771 + }, + { + "epoch": 1.369702211787965, + "grad_norm": 1.233538906022963, + "learning_rate": 4.967015371558592e-06, + "loss": 0.2513861358165741, + "step": 2772 + }, + { + "epoch": 1.3701964660817991, + "grad_norm": 1.2944813845771217, + "learning_rate": 4.959956740415761e-06, + "loss": 0.2785816490650177, + "step": 2773 + }, + { + "epoch": 1.3706907203756333, + "grad_norm": 1.456856079389265, + "learning_rate": 4.9529014739338e-06, + "loss": 0.29092347621917725, + "step": 2774 + }, + { + "epoch": 1.3711849746694673, + "grad_norm": 1.3133832748237033, + "learning_rate": 4.945849576822693e-06, + "loss": 0.27067384123802185, + "step": 2775 + }, + { + "epoch": 1.3716792289633015, + "grad_norm": 1.3000530351478699, + "learning_rate": 4.938801053790199e-06, + "loss": 0.21500205993652344, + "step": 2776 + }, + { + "epoch": 1.3721734832571357, + "grad_norm": 1.2838621226635265, + "learning_rate": 4.931755909541808e-06, + "loss": 0.2422936111688614, + "step": 2777 + }, + { + "epoch": 1.37266773755097, + "grad_norm": 1.3694112071584477, + "learning_rate": 4.9247141487807515e-06, + "loss": 0.2760060727596283, + "step": 2778 + }, + { + "epoch": 1.3731619918448041, + "grad_norm": 1.39746625445185, + "learning_rate": 4.917675776208013e-06, + "loss": 0.22626326978206635, + "step": 2779 + }, + { + "epoch": 1.3736562461386383, + "grad_norm": 1.34096746485375, + "learning_rate": 4.910640796522308e-06, + "loss": 0.23023411631584167, + "step": 2780 + }, + { + "epoch": 1.3741505004324726, + "grad_norm": 1.29137003736815, + "learning_rate": 4.903609214420088e-06, + "loss": 0.22157053649425507, + "step": 2781 + }, + { + "epoch": 1.3746447547263068, + "grad_norm": 1.1801851543310786, + "learning_rate": 4.89658103459554e-06, + "loss": 0.24125584959983826, + "step": 2782 + }, + { + "epoch": 1.375139009020141, + "grad_norm": 1.3517508821088553, + "learning_rate": 4.889556261740578e-06, + "loss": 0.26294079422950745, + "step": 2783 + }, + { + "epoch": 1.3756332633139752, + "grad_norm": 1.2726719724151299, + "learning_rate": 4.882534900544829e-06, + "loss": 0.25327497720718384, + "step": 2784 + }, + { + "epoch": 1.3761275176078092, + "grad_norm": 1.2868199846308948, + "learning_rate": 4.875516955695663e-06, + "loss": 0.2716723084449768, + "step": 2785 + }, + { + "epoch": 1.3766217719016434, + "grad_norm": 1.4619117882899046, + "learning_rate": 4.8685024318781615e-06, + "loss": 0.2889532446861267, + "step": 2786 + }, + { + "epoch": 1.3771160261954776, + "grad_norm": 1.2622088454697893, + "learning_rate": 4.861491333775114e-06, + "loss": 0.23743030428886414, + "step": 2787 + }, + { + "epoch": 1.3776102804893118, + "grad_norm": 1.2912517641324606, + "learning_rate": 4.8544836660670305e-06, + "loss": 0.27180567383766174, + "step": 2788 + }, + { + "epoch": 1.378104534783146, + "grad_norm": 1.3376004646586275, + "learning_rate": 4.847479433432131e-06, + "loss": 0.2549944221973419, + "step": 2789 + }, + { + "epoch": 1.37859878907698, + "grad_norm": 1.189305404121555, + "learning_rate": 4.8404786405463414e-06, + "loss": 0.24112319946289062, + "step": 2790 + }, + { + "epoch": 1.3790930433708142, + "grad_norm": 1.1833978049698726, + "learning_rate": 4.833481292083291e-06, + "loss": 0.22865869104862213, + "step": 2791 + }, + { + "epoch": 1.3795872976646484, + "grad_norm": 1.268697923498799, + "learning_rate": 4.82648739271431e-06, + "loss": 0.24851003289222717, + "step": 2792 + }, + { + "epoch": 1.3800815519584826, + "grad_norm": 1.2931223721765053, + "learning_rate": 4.819496947108424e-06, + "loss": 0.251456081867218, + "step": 2793 + }, + { + "epoch": 1.3805758062523168, + "grad_norm": 1.4758961733623657, + "learning_rate": 4.81250995993236e-06, + "loss": 0.31711041927337646, + "step": 2794 + }, + { + "epoch": 1.381070060546151, + "grad_norm": 1.3291779254725478, + "learning_rate": 4.805526435850523e-06, + "loss": 0.2204340100288391, + "step": 2795 + }, + { + "epoch": 1.3815643148399852, + "grad_norm": 1.2784619373678463, + "learning_rate": 4.798546379525013e-06, + "loss": 0.26289406418800354, + "step": 2796 + }, + { + "epoch": 1.3820585691338194, + "grad_norm": 1.28320111492484, + "learning_rate": 4.7915697956156284e-06, + "loss": 0.24830611050128937, + "step": 2797 + }, + { + "epoch": 1.3825528234276536, + "grad_norm": 1.2879657785107324, + "learning_rate": 4.784596688779825e-06, + "loss": 0.24792183935642242, + "step": 2798 + }, + { + "epoch": 1.3830470777214878, + "grad_norm": 1.2696074389245717, + "learning_rate": 4.777627063672753e-06, + "loss": 0.2689560651779175, + "step": 2799 + }, + { + "epoch": 1.3835413320153218, + "grad_norm": 1.3225545388421776, + "learning_rate": 4.770660924947238e-06, + "loss": 0.24323254823684692, + "step": 2800 + }, + { + "epoch": 1.384035586309156, + "grad_norm": 1.4076671335254063, + "learning_rate": 4.7636982772537645e-06, + "loss": 0.24404528737068176, + "step": 2801 + }, + { + "epoch": 1.3845298406029902, + "grad_norm": 1.203765816908177, + "learning_rate": 4.7567391252405075e-06, + "loss": 0.23512448370456696, + "step": 2802 + }, + { + "epoch": 1.3850240948968244, + "grad_norm": 1.5018331188451308, + "learning_rate": 4.749783473553297e-06, + "loss": 0.26446110010147095, + "step": 2803 + }, + { + "epoch": 1.3855183491906586, + "grad_norm": 1.408580468005289, + "learning_rate": 4.742831326835618e-06, + "loss": 0.24630968272686005, + "step": 2804 + }, + { + "epoch": 1.3860126034844928, + "grad_norm": 1.3358261514200123, + "learning_rate": 4.735882689728628e-06, + "loss": 0.253492146730423, + "step": 2805 + }, + { + "epoch": 1.3865068577783268, + "grad_norm": 1.3501776737603972, + "learning_rate": 4.7289375668711444e-06, + "loss": 0.271090567111969, + "step": 2806 + }, + { + "epoch": 1.387001112072161, + "grad_norm": 1.278147407656648, + "learning_rate": 4.721995962899625e-06, + "loss": 0.24045832455158234, + "step": 2807 + }, + { + "epoch": 1.3874953663659952, + "grad_norm": 1.3482420589650876, + "learning_rate": 4.715057882448187e-06, + "loss": 0.2525935471057892, + "step": 2808 + }, + { + "epoch": 1.3879896206598294, + "grad_norm": 1.6416013674407632, + "learning_rate": 4.708123330148593e-06, + "loss": 0.30852392315864563, + "step": 2809 + }, + { + "epoch": 1.3884838749536637, + "grad_norm": 1.4379358472073636, + "learning_rate": 4.701192310630253e-06, + "loss": 0.2770250737667084, + "step": 2810 + }, + { + "epoch": 1.3889781292474979, + "grad_norm": 1.3872314722590495, + "learning_rate": 4.6942648285202154e-06, + "loss": 0.29135680198669434, + "step": 2811 + }, + { + "epoch": 1.389472383541332, + "grad_norm": 1.3561535153102244, + "learning_rate": 4.687340888443171e-06, + "loss": 0.26933860778808594, + "step": 2812 + }, + { + "epoch": 1.3899666378351663, + "grad_norm": 1.3589820356083573, + "learning_rate": 4.680420495021436e-06, + "loss": 0.26089105010032654, + "step": 2813 + }, + { + "epoch": 1.3904608921290005, + "grad_norm": 1.446680212777315, + "learning_rate": 4.673503652874977e-06, + "loss": 0.26031410694122314, + "step": 2814 + }, + { + "epoch": 1.3909551464228345, + "grad_norm": 1.4223445911905375, + "learning_rate": 4.6665903666213685e-06, + "loss": 0.2887076139450073, + "step": 2815 + }, + { + "epoch": 1.3914494007166687, + "grad_norm": 1.4125652827001185, + "learning_rate": 4.6596806408758275e-06, + "loss": 0.2360706925392151, + "step": 2816 + }, + { + "epoch": 1.3919436550105029, + "grad_norm": 1.2857689419175287, + "learning_rate": 4.652774480251186e-06, + "loss": 0.22275522351264954, + "step": 2817 + }, + { + "epoch": 1.392437909304337, + "grad_norm": 1.4433288432295395, + "learning_rate": 4.645871889357899e-06, + "loss": 0.2425977736711502, + "step": 2818 + }, + { + "epoch": 1.3929321635981713, + "grad_norm": 1.3257241152583827, + "learning_rate": 4.638972872804038e-06, + "loss": 0.25219830870628357, + "step": 2819 + }, + { + "epoch": 1.3934264178920055, + "grad_norm": 1.3749035761313395, + "learning_rate": 4.6320774351952916e-06, + "loss": 0.28060346841812134, + "step": 2820 + }, + { + "epoch": 1.3939206721858395, + "grad_norm": 1.2003147708990263, + "learning_rate": 4.625185581134942e-06, + "loss": 0.2395240217447281, + "step": 2821 + }, + { + "epoch": 1.3944149264796737, + "grad_norm": 1.1704641579429333, + "learning_rate": 4.618297315223906e-06, + "loss": 0.23622646927833557, + "step": 2822 + }, + { + "epoch": 1.394909180773508, + "grad_norm": 1.2829625624138312, + "learning_rate": 4.611412642060692e-06, + "loss": 0.2189474105834961, + "step": 2823 + }, + { + "epoch": 1.395403435067342, + "grad_norm": 1.433264639271618, + "learning_rate": 4.6045315662414e-06, + "loss": 0.266002357006073, + "step": 2824 + }, + { + "epoch": 1.3958976893611763, + "grad_norm": 1.3252437693414834, + "learning_rate": 4.5976540923597425e-06, + "loss": 0.2402176856994629, + "step": 2825 + }, + { + "epoch": 1.3963919436550105, + "grad_norm": 1.359969321526994, + "learning_rate": 4.5907802250070235e-06, + "loss": 0.2493474781513214, + "step": 2826 + }, + { + "epoch": 1.3968861979488447, + "grad_norm": 1.41117190363675, + "learning_rate": 4.583909968772137e-06, + "loss": 0.25716543197631836, + "step": 2827 + }, + { + "epoch": 1.397380452242679, + "grad_norm": 1.2726969842984424, + "learning_rate": 4.57704332824157e-06, + "loss": 0.29470473527908325, + "step": 2828 + }, + { + "epoch": 1.3978747065365131, + "grad_norm": 1.3349562969336177, + "learning_rate": 4.570180307999394e-06, + "loss": 0.28095656633377075, + "step": 2829 + }, + { + "epoch": 1.3983689608303473, + "grad_norm": 1.3296802970374444, + "learning_rate": 4.563320912627256e-06, + "loss": 0.2351825088262558, + "step": 2830 + }, + { + "epoch": 1.3988632151241813, + "grad_norm": 1.378245480597285, + "learning_rate": 4.556465146704399e-06, + "loss": 0.25859856605529785, + "step": 2831 + }, + { + "epoch": 1.3993574694180155, + "grad_norm": 1.3122509634402246, + "learning_rate": 4.549613014807637e-06, + "loss": 0.2503181993961334, + "step": 2832 + }, + { + "epoch": 1.3998517237118497, + "grad_norm": 1.4164889794081637, + "learning_rate": 4.542764521511345e-06, + "loss": 0.26368820667266846, + "step": 2833 + }, + { + "epoch": 1.400345978005684, + "grad_norm": 1.2584462742908673, + "learning_rate": 4.535919671387483e-06, + "loss": 0.24077676236629486, + "step": 2834 + }, + { + "epoch": 1.4008402322995182, + "grad_norm": 1.3906309875331755, + "learning_rate": 4.529078469005577e-06, + "loss": 0.27042093873023987, + "step": 2835 + }, + { + "epoch": 1.4013344865933524, + "grad_norm": 1.3047899471845867, + "learning_rate": 4.5222409189327155e-06, + "loss": 0.2731306552886963, + "step": 2836 + }, + { + "epoch": 1.4018287408871863, + "grad_norm": 1.293016022457822, + "learning_rate": 4.515407025733548e-06, + "loss": 0.2925037741661072, + "step": 2837 + }, + { + "epoch": 1.4023229951810205, + "grad_norm": 1.3019226114538747, + "learning_rate": 4.508576793970285e-06, + "loss": 0.2927025556564331, + "step": 2838 + }, + { + "epoch": 1.4028172494748548, + "grad_norm": 1.2637397509173496, + "learning_rate": 4.5017502282026926e-06, + "loss": 0.26285338401794434, + "step": 2839 + }, + { + "epoch": 1.403311503768689, + "grad_norm": 1.3147900807622677, + "learning_rate": 4.49492733298809e-06, + "loss": 0.22698873281478882, + "step": 2840 + }, + { + "epoch": 1.4038057580625232, + "grad_norm": 1.3171706155487821, + "learning_rate": 4.488108112881339e-06, + "loss": 0.24116170406341553, + "step": 2841 + }, + { + "epoch": 1.4043000123563574, + "grad_norm": 1.57472275672956, + "learning_rate": 4.481292572434852e-06, + "loss": 0.3211704194545746, + "step": 2842 + }, + { + "epoch": 1.4047942666501916, + "grad_norm": 1.3631722904804857, + "learning_rate": 4.474480716198598e-06, + "loss": 0.26634523272514343, + "step": 2843 + }, + { + "epoch": 1.4052885209440258, + "grad_norm": 1.2801660794508798, + "learning_rate": 4.467672548720066e-06, + "loss": 0.24751242995262146, + "step": 2844 + }, + { + "epoch": 1.40578277523786, + "grad_norm": 1.2023997182117507, + "learning_rate": 4.4608680745442915e-06, + "loss": 0.22031354904174805, + "step": 2845 + }, + { + "epoch": 1.406277029531694, + "grad_norm": 1.4549549871552898, + "learning_rate": 4.454067298213847e-06, + "loss": 0.2474634051322937, + "step": 2846 + }, + { + "epoch": 1.4067712838255282, + "grad_norm": 1.2925543429398942, + "learning_rate": 4.4472702242688315e-06, + "loss": 0.2494845986366272, + "step": 2847 + }, + { + "epoch": 1.4072655381193624, + "grad_norm": 1.246615378915442, + "learning_rate": 4.440476857246876e-06, + "loss": 0.23150494694709778, + "step": 2848 + }, + { + "epoch": 1.4077597924131966, + "grad_norm": 1.3473585855048795, + "learning_rate": 4.433687201683138e-06, + "loss": 0.2093413770198822, + "step": 2849 + }, + { + "epoch": 1.4082540467070308, + "grad_norm": 1.4247715723132508, + "learning_rate": 4.426901262110287e-06, + "loss": 0.26741865277290344, + "step": 2850 + }, + { + "epoch": 1.408748301000865, + "grad_norm": 1.3965732526570211, + "learning_rate": 4.420119043058521e-06, + "loss": 0.2599044740200043, + "step": 2851 + }, + { + "epoch": 1.409242555294699, + "grad_norm": 1.37695062225065, + "learning_rate": 4.413340549055562e-06, + "loss": 0.26934683322906494, + "step": 2852 + }, + { + "epoch": 1.4097368095885332, + "grad_norm": 1.247550824996485, + "learning_rate": 4.4065657846266255e-06, + "loss": 0.2609720528125763, + "step": 2853 + }, + { + "epoch": 1.4102310638823674, + "grad_norm": 1.3034094501092508, + "learning_rate": 4.39979475429445e-06, + "loss": 0.23431813716888428, + "step": 2854 + }, + { + "epoch": 1.4107253181762016, + "grad_norm": 1.5127417165274348, + "learning_rate": 4.39302746257928e-06, + "loss": 0.2791878581047058, + "step": 2855 + }, + { + "epoch": 1.4112195724700358, + "grad_norm": 1.445393105302077, + "learning_rate": 4.386263913998862e-06, + "loss": 0.30482247471809387, + "step": 2856 + }, + { + "epoch": 1.41171382676387, + "grad_norm": 1.517774336378155, + "learning_rate": 4.379504113068445e-06, + "loss": 0.24561305344104767, + "step": 2857 + }, + { + "epoch": 1.4122080810577042, + "grad_norm": 1.2686201180133903, + "learning_rate": 4.372748064300777e-06, + "loss": 0.23973286151885986, + "step": 2858 + }, + { + "epoch": 1.4127023353515384, + "grad_norm": 1.2884315615066577, + "learning_rate": 4.365995772206092e-06, + "loss": 0.26788556575775146, + "step": 2859 + }, + { + "epoch": 1.4131965896453726, + "grad_norm": 1.2479985472864645, + "learning_rate": 4.359247241292136e-06, + "loss": 0.22432288527488708, + "step": 2860 + }, + { + "epoch": 1.4136908439392069, + "grad_norm": 1.4071442664764462, + "learning_rate": 4.352502476064121e-06, + "loss": 0.282687783241272, + "step": 2861 + }, + { + "epoch": 1.4141850982330408, + "grad_norm": 1.350175603929749, + "learning_rate": 4.345761481024761e-06, + "loss": 0.2516692578792572, + "step": 2862 + }, + { + "epoch": 1.414679352526875, + "grad_norm": 1.3813903906983658, + "learning_rate": 4.3390242606742465e-06, + "loss": 0.2473583221435547, + "step": 2863 + }, + { + "epoch": 1.4151736068207093, + "grad_norm": 1.365125849897862, + "learning_rate": 4.33229081951025e-06, + "loss": 0.24372908473014832, + "step": 2864 + }, + { + "epoch": 1.4156678611145435, + "grad_norm": 1.935117633937839, + "learning_rate": 4.325561162027922e-06, + "loss": 0.2877897024154663, + "step": 2865 + }, + { + "epoch": 1.4161621154083777, + "grad_norm": 1.3789670558806315, + "learning_rate": 4.318835292719886e-06, + "loss": 0.2554720342159271, + "step": 2866 + }, + { + "epoch": 1.4166563697022119, + "grad_norm": 1.400243578908533, + "learning_rate": 4.312113216076228e-06, + "loss": 0.26695260405540466, + "step": 2867 + }, + { + "epoch": 1.4171506239960459, + "grad_norm": 1.310264039945657, + "learning_rate": 4.305394936584522e-06, + "loss": 0.26983851194381714, + "step": 2868 + }, + { + "epoch": 1.41764487828988, + "grad_norm": 1.4664847959785403, + "learning_rate": 4.298680458729793e-06, + "loss": 0.303170382976532, + "step": 2869 + }, + { + "epoch": 1.4181391325837143, + "grad_norm": 1.2870012899484584, + "learning_rate": 4.2919697869945234e-06, + "loss": 0.23217584192752838, + "step": 2870 + }, + { + "epoch": 1.4186333868775485, + "grad_norm": 1.3723703910904035, + "learning_rate": 4.285262925858663e-06, + "loss": 0.2895517349243164, + "step": 2871 + }, + { + "epoch": 1.4191276411713827, + "grad_norm": 1.3083324921698822, + "learning_rate": 4.278559879799628e-06, + "loss": 0.24025630950927734, + "step": 2872 + }, + { + "epoch": 1.4196218954652169, + "grad_norm": 1.2827271091784578, + "learning_rate": 4.271860653292263e-06, + "loss": 0.22810839116573334, + "step": 2873 + }, + { + "epoch": 1.420116149759051, + "grad_norm": 1.3806208017840322, + "learning_rate": 4.26516525080888e-06, + "loss": 0.266724169254303, + "step": 2874 + }, + { + "epoch": 1.4206104040528853, + "grad_norm": 1.225057219675358, + "learning_rate": 4.25847367681924e-06, + "loss": 0.22618745267391205, + "step": 2875 + }, + { + "epoch": 1.4211046583467195, + "grad_norm": 1.2369737958102245, + "learning_rate": 4.251785935790529e-06, + "loss": 0.2239789217710495, + "step": 2876 + }, + { + "epoch": 1.4215989126405535, + "grad_norm": 1.4266723106614325, + "learning_rate": 4.245102032187399e-06, + "loss": 0.21519358456134796, + "step": 2877 + }, + { + "epoch": 1.4220931669343877, + "grad_norm": 1.3543349519259755, + "learning_rate": 4.2384219704719284e-06, + "loss": 0.31226712465286255, + "step": 2878 + }, + { + "epoch": 1.422587421228222, + "grad_norm": 1.56763311196269, + "learning_rate": 4.231745755103625e-06, + "loss": 0.26814836263656616, + "step": 2879 + }, + { + "epoch": 1.423081675522056, + "grad_norm": 1.340943129837897, + "learning_rate": 4.225073390539436e-06, + "loss": 0.2369621843099594, + "step": 2880 + }, + { + "epoch": 1.4235759298158903, + "grad_norm": 1.4174455321042607, + "learning_rate": 4.218404881233737e-06, + "loss": 0.2556746304035187, + "step": 2881 + }, + { + "epoch": 1.4240701841097245, + "grad_norm": 1.4008574237374047, + "learning_rate": 4.2117402316383314e-06, + "loss": 0.25875598192214966, + "step": 2882 + }, + { + "epoch": 1.4245644384035585, + "grad_norm": 1.3837412182941131, + "learning_rate": 4.205079446202443e-06, + "loss": 0.26839762926101685, + "step": 2883 + }, + { + "epoch": 1.4250586926973927, + "grad_norm": 1.3404796422391116, + "learning_rate": 4.198422529372717e-06, + "loss": 0.2764383554458618, + "step": 2884 + }, + { + "epoch": 1.425552946991227, + "grad_norm": 1.6233600341280843, + "learning_rate": 4.191769485593216e-06, + "loss": 0.24517112970352173, + "step": 2885 + }, + { + "epoch": 1.4260472012850611, + "grad_norm": 1.2960278491651354, + "learning_rate": 4.18512031930542e-06, + "loss": 0.21880990266799927, + "step": 2886 + }, + { + "epoch": 1.4265414555788953, + "grad_norm": 1.25547495232964, + "learning_rate": 4.178475034948212e-06, + "loss": 0.24671246111392975, + "step": 2887 + }, + { + "epoch": 1.4270357098727295, + "grad_norm": 1.3321806455697769, + "learning_rate": 4.171833636957886e-06, + "loss": 0.25473371148109436, + "step": 2888 + }, + { + "epoch": 1.4275299641665637, + "grad_norm": 1.2832708163920512, + "learning_rate": 4.1651961297681574e-06, + "loss": 0.2675618529319763, + "step": 2889 + }, + { + "epoch": 1.428024218460398, + "grad_norm": 1.361777795281808, + "learning_rate": 4.15856251781012e-06, + "loss": 0.24357986450195312, + "step": 2890 + }, + { + "epoch": 1.4285184727542322, + "grad_norm": 1.360475333723739, + "learning_rate": 4.1519328055122825e-06, + "loss": 0.2668409049510956, + "step": 2891 + }, + { + "epoch": 1.4290127270480664, + "grad_norm": 1.237397304360782, + "learning_rate": 4.145306997300543e-06, + "loss": 0.24507637321949005, + "step": 2892 + }, + { + "epoch": 1.4295069813419004, + "grad_norm": 1.366253286129835, + "learning_rate": 4.1386850975982e-06, + "loss": 0.2791709899902344, + "step": 2893 + }, + { + "epoch": 1.4300012356357346, + "grad_norm": 1.2339989570889298, + "learning_rate": 4.132067110825939e-06, + "loss": 0.24982133507728577, + "step": 2894 + }, + { + "epoch": 1.4304954899295688, + "grad_norm": 1.4357848897595227, + "learning_rate": 4.125453041401835e-06, + "loss": 0.2814679741859436, + "step": 2895 + }, + { + "epoch": 1.430989744223403, + "grad_norm": 1.2447298736764703, + "learning_rate": 4.118842893741336e-06, + "loss": 0.22699782252311707, + "step": 2896 + }, + { + "epoch": 1.4314839985172372, + "grad_norm": 1.9366220135779266, + "learning_rate": 4.112236672257294e-06, + "loss": 0.23297230899333954, + "step": 2897 + }, + { + "epoch": 1.4319782528110712, + "grad_norm": 1.4169021772429402, + "learning_rate": 4.1056343813599265e-06, + "loss": 0.26085159182548523, + "step": 2898 + }, + { + "epoch": 1.4324725071049054, + "grad_norm": 1.2947699028454482, + "learning_rate": 4.0990360254568216e-06, + "loss": 0.27813559770584106, + "step": 2899 + }, + { + "epoch": 1.4329667613987396, + "grad_norm": 1.4648322974961994, + "learning_rate": 4.092441608952953e-06, + "loss": 0.2821611762046814, + "step": 2900 + }, + { + "epoch": 1.4334610156925738, + "grad_norm": 1.4262304528738896, + "learning_rate": 4.085851136250657e-06, + "loss": 0.25223150849342346, + "step": 2901 + }, + { + "epoch": 1.433955269986408, + "grad_norm": 1.2236760469459784, + "learning_rate": 4.079264611749639e-06, + "loss": 0.225361630320549, + "step": 2902 + }, + { + "epoch": 1.4344495242802422, + "grad_norm": 1.2980114377261416, + "learning_rate": 4.07268203984697e-06, + "loss": 0.2564583420753479, + "step": 2903 + }, + { + "epoch": 1.4349437785740764, + "grad_norm": 1.618238680371033, + "learning_rate": 4.066103424937083e-06, + "loss": 0.2433827817440033, + "step": 2904 + }, + { + "epoch": 1.4354380328679106, + "grad_norm": 1.326779755851318, + "learning_rate": 4.059528771411758e-06, + "loss": 0.26073208451271057, + "step": 2905 + }, + { + "epoch": 1.4359322871617448, + "grad_norm": 1.381783420476221, + "learning_rate": 4.052958083660153e-06, + "loss": 0.2937609553337097, + "step": 2906 + }, + { + "epoch": 1.436426541455579, + "grad_norm": 1.2248682484343931, + "learning_rate": 4.046391366068756e-06, + "loss": 0.22026552259922028, + "step": 2907 + }, + { + "epoch": 1.436920795749413, + "grad_norm": 1.2471555303405935, + "learning_rate": 4.039828623021415e-06, + "loss": 0.21137471497058868, + "step": 2908 + }, + { + "epoch": 1.4374150500432472, + "grad_norm": 1.316365476590171, + "learning_rate": 4.033269858899324e-06, + "loss": 0.23597699403762817, + "step": 2909 + }, + { + "epoch": 1.4379093043370814, + "grad_norm": 1.3166979356724768, + "learning_rate": 4.026715078081023e-06, + "loss": 0.2667025923728943, + "step": 2910 + }, + { + "epoch": 1.4384035586309156, + "grad_norm": 1.2942746954451143, + "learning_rate": 4.020164284942387e-06, + "loss": 0.2789616584777832, + "step": 2911 + }, + { + "epoch": 1.4388978129247498, + "grad_norm": 1.2105601579452838, + "learning_rate": 4.013617483856637e-06, + "loss": 0.23176617920398712, + "step": 2912 + }, + { + "epoch": 1.439392067218584, + "grad_norm": 1.3989428986083243, + "learning_rate": 4.007074679194313e-06, + "loss": 0.2814248204231262, + "step": 2913 + }, + { + "epoch": 1.439886321512418, + "grad_norm": 1.7399518805726892, + "learning_rate": 4.000535875323307e-06, + "loss": 0.26201730966567993, + "step": 2914 + }, + { + "epoch": 1.4403805758062522, + "grad_norm": 1.3752450122135709, + "learning_rate": 3.994001076608833e-06, + "loss": 0.22517681121826172, + "step": 2915 + }, + { + "epoch": 1.4408748301000864, + "grad_norm": 1.2576751634156127, + "learning_rate": 3.9874702874134205e-06, + "loss": 0.25220564007759094, + "step": 2916 + }, + { + "epoch": 1.4413690843939206, + "grad_norm": 1.3128506030513347, + "learning_rate": 3.980943512096934e-06, + "loss": 0.23441332578659058, + "step": 2917 + }, + { + "epoch": 1.4418633386877548, + "grad_norm": 1.1616125895518352, + "learning_rate": 3.9744207550165625e-06, + "loss": 0.21659764647483826, + "step": 2918 + }, + { + "epoch": 1.442357592981589, + "grad_norm": 1.3726974417027011, + "learning_rate": 3.967902020526797e-06, + "loss": 0.21888667345046997, + "step": 2919 + }, + { + "epoch": 1.4428518472754233, + "grad_norm": 2.445936326011648, + "learning_rate": 3.961387312979454e-06, + "loss": 0.2771157920360565, + "step": 2920 + }, + { + "epoch": 1.4433461015692575, + "grad_norm": 1.312047281106489, + "learning_rate": 3.9548766367236605e-06, + "loss": 0.21376901865005493, + "step": 2921 + }, + { + "epoch": 1.4438403558630917, + "grad_norm": 1.4472763394283668, + "learning_rate": 3.948369996105849e-06, + "loss": 0.2888128161430359, + "step": 2922 + }, + { + "epoch": 1.4443346101569257, + "grad_norm": 1.327788891714265, + "learning_rate": 3.941867395469761e-06, + "loss": 0.27809786796569824, + "step": 2923 + }, + { + "epoch": 1.4448288644507599, + "grad_norm": 1.377899507369851, + "learning_rate": 3.935368839156443e-06, + "loss": 0.2573625445365906, + "step": 2924 + }, + { + "epoch": 1.445323118744594, + "grad_norm": 1.5375959387987326, + "learning_rate": 3.928874331504232e-06, + "loss": 0.21472841501235962, + "step": 2925 + }, + { + "epoch": 1.4458173730384283, + "grad_norm": 1.2616393731465387, + "learning_rate": 3.922383876848771e-06, + "loss": 0.23214091360569, + "step": 2926 + }, + { + "epoch": 1.4463116273322625, + "grad_norm": 1.2717196020996628, + "learning_rate": 3.915897479522995e-06, + "loss": 0.23830139636993408, + "step": 2927 + }, + { + "epoch": 1.4468058816260967, + "grad_norm": 1.306053937449173, + "learning_rate": 3.909415143857132e-06, + "loss": 0.2519805431365967, + "step": 2928 + }, + { + "epoch": 1.4473001359199307, + "grad_norm": 1.3548983452054761, + "learning_rate": 3.9029368741786935e-06, + "loss": 0.2191445231437683, + "step": 2929 + }, + { + "epoch": 1.4477943902137649, + "grad_norm": 1.2448486288410623, + "learning_rate": 3.896462674812482e-06, + "loss": 0.2267228364944458, + "step": 2930 + }, + { + "epoch": 1.448288644507599, + "grad_norm": 1.3302096442776044, + "learning_rate": 3.88999255008058e-06, + "loss": 0.26456522941589355, + "step": 2931 + }, + { + "epoch": 1.4487828988014333, + "grad_norm": 1.3729869343228434, + "learning_rate": 3.883526504302353e-06, + "loss": 0.25602713227272034, + "step": 2932 + }, + { + "epoch": 1.4492771530952675, + "grad_norm": 1.9847312680384686, + "learning_rate": 3.877064541794435e-06, + "loss": 0.2545332610607147, + "step": 2933 + }, + { + "epoch": 1.4497714073891017, + "grad_norm": 1.3785644388388194, + "learning_rate": 3.87060666687074e-06, + "loss": 0.2846388816833496, + "step": 2934 + }, + { + "epoch": 1.450265661682936, + "grad_norm": 1.4353094721790403, + "learning_rate": 3.864152883842461e-06, + "loss": 0.2686496376991272, + "step": 2935 + }, + { + "epoch": 1.4507599159767701, + "grad_norm": 1.2943779410551872, + "learning_rate": 3.857703197018044e-06, + "loss": 0.2712322473526001, + "step": 2936 + }, + { + "epoch": 1.4512541702706043, + "grad_norm": 1.3542096863749147, + "learning_rate": 3.851257610703209e-06, + "loss": 0.23492589592933655, + "step": 2937 + }, + { + "epoch": 1.4517484245644385, + "grad_norm": 1.2747230322582852, + "learning_rate": 3.84481612920094e-06, + "loss": 0.274332731962204, + "step": 2938 + }, + { + "epoch": 1.4522426788582725, + "grad_norm": 1.4107112786506069, + "learning_rate": 3.838378756811475e-06, + "loss": 0.250995010137558, + "step": 2939 + }, + { + "epoch": 1.4527369331521067, + "grad_norm": 1.3749429977256393, + "learning_rate": 3.831945497832313e-06, + "loss": 0.25221261382102966, + "step": 2940 + }, + { + "epoch": 1.453231187445941, + "grad_norm": 1.4826415922959744, + "learning_rate": 3.825516356558211e-06, + "loss": 0.2549906075000763, + "step": 2941 + }, + { + "epoch": 1.4537254417397751, + "grad_norm": 1.296751596925164, + "learning_rate": 3.819091337281158e-06, + "loss": 0.2369248867034912, + "step": 2942 + }, + { + "epoch": 1.4542196960336093, + "grad_norm": 1.3057816538242708, + "learning_rate": 3.8126704442904182e-06, + "loss": 0.23681433498859406, + "step": 2943 + }, + { + "epoch": 1.4547139503274436, + "grad_norm": 1.237019268284654, + "learning_rate": 3.806253681872486e-06, + "loss": 0.24966523051261902, + "step": 2944 + }, + { + "epoch": 1.4552082046212775, + "grad_norm": 1.4768369352256168, + "learning_rate": 3.7998410543110954e-06, + "loss": 0.28130626678466797, + "step": 2945 + }, + { + "epoch": 1.4557024589151117, + "grad_norm": 1.3443210173277784, + "learning_rate": 3.7934325658872275e-06, + "loss": 0.2725732922554016, + "step": 2946 + }, + { + "epoch": 1.456196713208946, + "grad_norm": 1.3345618379823432, + "learning_rate": 3.7870282208790976e-06, + "loss": 0.23695361614227295, + "step": 2947 + }, + { + "epoch": 1.4566909675027802, + "grad_norm": 1.3094683367768178, + "learning_rate": 3.780628023562154e-06, + "loss": 0.2556610405445099, + "step": 2948 + }, + { + "epoch": 1.4571852217966144, + "grad_norm": 1.29841880424943, + "learning_rate": 3.7742319782090786e-06, + "loss": 0.26012274622917175, + "step": 2949 + }, + { + "epoch": 1.4576794760904486, + "grad_norm": 1.4612114957138427, + "learning_rate": 3.7678400890897827e-06, + "loss": 0.23788896203041077, + "step": 2950 + }, + { + "epoch": 1.4581737303842828, + "grad_norm": 1.4390155766896275, + "learning_rate": 3.7614523604713894e-06, + "loss": 0.2927572727203369, + "step": 2951 + }, + { + "epoch": 1.458667984678117, + "grad_norm": 1.2435143086118214, + "learning_rate": 3.75506879661827e-06, + "loss": 0.2254970222711563, + "step": 2952 + }, + { + "epoch": 1.4591622389719512, + "grad_norm": 1.2816222898303182, + "learning_rate": 3.7486894017919883e-06, + "loss": 0.216854065656662, + "step": 2953 + }, + { + "epoch": 1.4596564932657852, + "grad_norm": 1.1833481657982283, + "learning_rate": 3.7423141802513417e-06, + "loss": 0.2505137026309967, + "step": 2954 + }, + { + "epoch": 1.4601507475596194, + "grad_norm": 1.2187582021965486, + "learning_rate": 3.735943136252337e-06, + "loss": 0.19780108332633972, + "step": 2955 + }, + { + "epoch": 1.4606450018534536, + "grad_norm": 1.482633837182769, + "learning_rate": 3.7295762740481923e-06, + "loss": 0.26869216561317444, + "step": 2956 + }, + { + "epoch": 1.4611392561472878, + "grad_norm": 1.4121232274028632, + "learning_rate": 3.7232135978893336e-06, + "loss": 0.28265517950057983, + "step": 2957 + }, + { + "epoch": 1.461633510441122, + "grad_norm": 1.268342410891318, + "learning_rate": 3.7168551120233965e-06, + "loss": 0.2381918877363205, + "step": 2958 + }, + { + "epoch": 1.4621277647349562, + "grad_norm": 1.3343795310746396, + "learning_rate": 3.710500820695203e-06, + "loss": 0.27194735407829285, + "step": 2959 + }, + { + "epoch": 1.4626220190287902, + "grad_norm": 1.419071318428777, + "learning_rate": 3.7041507281468e-06, + "loss": 0.2611599266529083, + "step": 2960 + }, + { + "epoch": 1.4631162733226244, + "grad_norm": 1.3417831313824735, + "learning_rate": 3.697804838617418e-06, + "loss": 0.2970972955226898, + "step": 2961 + }, + { + "epoch": 1.4636105276164586, + "grad_norm": 1.3986503652920064, + "learning_rate": 3.6914631563434743e-06, + "loss": 0.24313557147979736, + "step": 2962 + }, + { + "epoch": 1.4641047819102928, + "grad_norm": 1.21693161859368, + "learning_rate": 3.685125685558587e-06, + "loss": 0.23243792355060577, + "step": 2963 + }, + { + "epoch": 1.464599036204127, + "grad_norm": 1.384655578733909, + "learning_rate": 3.6787924304935696e-06, + "loss": 0.2850711941719055, + "step": 2964 + }, + { + "epoch": 1.4650932904979612, + "grad_norm": 1.2938153090671698, + "learning_rate": 3.6724633953764023e-06, + "loss": 0.26217392086982727, + "step": 2965 + }, + { + "epoch": 1.4655875447917954, + "grad_norm": 1.3004956100522334, + "learning_rate": 3.666138584432264e-06, + "loss": 0.24623268842697144, + "step": 2966 + }, + { + "epoch": 1.4660817990856296, + "grad_norm": 1.2765502382143128, + "learning_rate": 3.6598180018835063e-06, + "loss": 0.25010040402412415, + "step": 2967 + }, + { + "epoch": 1.4665760533794638, + "grad_norm": 1.2806642930208934, + "learning_rate": 3.6535016519496603e-06, + "loss": 0.24471378326416016, + "step": 2968 + }, + { + "epoch": 1.467070307673298, + "grad_norm": 1.4411992818002375, + "learning_rate": 3.6471895388474323e-06, + "loss": 0.2845621109008789, + "step": 2969 + }, + { + "epoch": 1.467564561967132, + "grad_norm": 1.394997312403621, + "learning_rate": 3.640881666790699e-06, + "loss": 0.26768919825553894, + "step": 2970 + }, + { + "epoch": 1.4680588162609662, + "grad_norm": 1.3707198305280583, + "learning_rate": 3.6345780399904983e-06, + "loss": 0.27386170625686646, + "step": 2971 + }, + { + "epoch": 1.4685530705548004, + "grad_norm": 1.2413908046529407, + "learning_rate": 3.628278662655055e-06, + "loss": 0.259655237197876, + "step": 2972 + }, + { + "epoch": 1.4690473248486347, + "grad_norm": 1.2328404027424946, + "learning_rate": 3.6219835389897305e-06, + "loss": 0.2234620749950409, + "step": 2973 + }, + { + "epoch": 1.4695415791424689, + "grad_norm": 1.2170225214049992, + "learning_rate": 3.6156926731970664e-06, + "loss": 0.25133174657821655, + "step": 2974 + }, + { + "epoch": 1.4700358334363028, + "grad_norm": 1.4753631122763826, + "learning_rate": 3.609406069476752e-06, + "loss": 0.2856005132198334, + "step": 2975 + }, + { + "epoch": 1.470530087730137, + "grad_norm": 1.352763052735898, + "learning_rate": 3.603123732025635e-06, + "loss": 0.23760217428207397, + "step": 2976 + }, + { + "epoch": 1.4710243420239713, + "grad_norm": 1.315945468844056, + "learning_rate": 3.596845665037715e-06, + "loss": 0.2344968169927597, + "step": 2977 + }, + { + "epoch": 1.4715185963178055, + "grad_norm": 1.3513242562279373, + "learning_rate": 3.5905718727041415e-06, + "loss": 0.23936885595321655, + "step": 2978 + }, + { + "epoch": 1.4720128506116397, + "grad_norm": 1.2281537442777626, + "learning_rate": 3.584302359213204e-06, + "loss": 0.24542436003684998, + "step": 2979 + }, + { + "epoch": 1.4725071049054739, + "grad_norm": 1.2816242991916544, + "learning_rate": 3.578037128750338e-06, + "loss": 0.24754226207733154, + "step": 2980 + }, + { + "epoch": 1.473001359199308, + "grad_norm": 1.3406109779820896, + "learning_rate": 3.5717761854981335e-06, + "loss": 0.25167495012283325, + "step": 2981 + }, + { + "epoch": 1.4734956134931423, + "grad_norm": 1.2820406301810907, + "learning_rate": 3.565519533636296e-06, + "loss": 0.21352116763591766, + "step": 2982 + }, + { + "epoch": 1.4739898677869765, + "grad_norm": 1.5800404779419173, + "learning_rate": 3.5592671773416798e-06, + "loss": 0.24721838533878326, + "step": 2983 + }, + { + "epoch": 1.4744841220808107, + "grad_norm": 1.209332122723965, + "learning_rate": 3.5530191207882705e-06, + "loss": 0.2098400741815567, + "step": 2984 + }, + { + "epoch": 1.4749783763746447, + "grad_norm": 1.4059961620340085, + "learning_rate": 3.5467753681471784e-06, + "loss": 0.27138370275497437, + "step": 2985 + }, + { + "epoch": 1.475472630668479, + "grad_norm": 1.456553871591733, + "learning_rate": 3.5405359235866468e-06, + "loss": 0.2675255537033081, + "step": 2986 + }, + { + "epoch": 1.475966884962313, + "grad_norm": 1.3852192514849078, + "learning_rate": 3.5343007912720397e-06, + "loss": 0.2927984893321991, + "step": 2987 + }, + { + "epoch": 1.4764611392561473, + "grad_norm": 1.4840757807353469, + "learning_rate": 3.5280699753658354e-06, + "loss": 0.2897256910800934, + "step": 2988 + }, + { + "epoch": 1.4769553935499815, + "grad_norm": 1.3162511876956198, + "learning_rate": 3.521843480027646e-06, + "loss": 0.25903570652008057, + "step": 2989 + }, + { + "epoch": 1.4774496478438157, + "grad_norm": 1.1815962199969574, + "learning_rate": 3.515621309414191e-06, + "loss": 0.2097684144973755, + "step": 2990 + }, + { + "epoch": 1.4779439021376497, + "grad_norm": 1.368257943211956, + "learning_rate": 3.5094034676792952e-06, + "loss": 0.25807827711105347, + "step": 2991 + }, + { + "epoch": 1.478438156431484, + "grad_norm": 1.3326288392160186, + "learning_rate": 3.503189958973906e-06, + "loss": 0.24161803722381592, + "step": 2992 + }, + { + "epoch": 1.4789324107253181, + "grad_norm": 1.3735233821721475, + "learning_rate": 3.4969807874460717e-06, + "loss": 0.2612338364124298, + "step": 2993 + }, + { + "epoch": 1.4794266650191523, + "grad_norm": 1.3484776453875857, + "learning_rate": 3.490775957240947e-06, + "loss": 0.2529192566871643, + "step": 2994 + }, + { + "epoch": 1.4799209193129865, + "grad_norm": 1.376626480795096, + "learning_rate": 3.4845754725007883e-06, + "loss": 0.2616920471191406, + "step": 2995 + }, + { + "epoch": 1.4804151736068207, + "grad_norm": 1.1709509708234012, + "learning_rate": 3.4783793373649534e-06, + "loss": 0.2372770607471466, + "step": 2996 + }, + { + "epoch": 1.480909427900655, + "grad_norm": 1.6683733615888718, + "learning_rate": 3.4721875559698826e-06, + "loss": 0.2993369996547699, + "step": 2997 + }, + { + "epoch": 1.4814036821944891, + "grad_norm": 1.444631738912031, + "learning_rate": 3.4660001324491354e-06, + "loss": 0.2703147530555725, + "step": 2998 + }, + { + "epoch": 1.4818979364883234, + "grad_norm": 1.497851135078702, + "learning_rate": 3.459817070933337e-06, + "loss": 0.2909662425518036, + "step": 2999 + }, + { + "epoch": 1.4823921907821573, + "grad_norm": 1.4957339087199897, + "learning_rate": 3.4536383755502146e-06, + "loss": 0.2620519697666168, + "step": 3000 + }, + { + "epoch": 1.4828864450759915, + "grad_norm": 1.4607702963487426, + "learning_rate": 3.447464050424576e-06, + "loss": 0.2740327715873718, + "step": 3001 + }, + { + "epoch": 1.4833806993698258, + "grad_norm": 1.4051737005514326, + "learning_rate": 3.441294099678314e-06, + "loss": 0.2597920000553131, + "step": 3002 + }, + { + "epoch": 1.48387495366366, + "grad_norm": 1.2931150222772085, + "learning_rate": 3.435128527430397e-06, + "loss": 0.23138844966888428, + "step": 3003 + }, + { + "epoch": 1.4843692079574942, + "grad_norm": 1.4678522965018421, + "learning_rate": 3.428967337796879e-06, + "loss": 0.26457998156547546, + "step": 3004 + }, + { + "epoch": 1.4848634622513284, + "grad_norm": 1.3435199008351797, + "learning_rate": 3.4228105348908703e-06, + "loss": 0.22283414006233215, + "step": 3005 + }, + { + "epoch": 1.4853577165451624, + "grad_norm": 1.404722725472706, + "learning_rate": 3.416658122822576e-06, + "loss": 0.26169392466545105, + "step": 3006 + }, + { + "epoch": 1.4858519708389966, + "grad_norm": 1.3942121909077798, + "learning_rate": 3.4105101056992574e-06, + "loss": 0.22738765180110931, + "step": 3007 + }, + { + "epoch": 1.4863462251328308, + "grad_norm": 1.640113120385147, + "learning_rate": 3.404366487625237e-06, + "loss": 0.24252702295780182, + "step": 3008 + }, + { + "epoch": 1.486840479426665, + "grad_norm": 1.2658350422978366, + "learning_rate": 3.398227272701905e-06, + "loss": 0.2192659229040146, + "step": 3009 + }, + { + "epoch": 1.4873347337204992, + "grad_norm": 1.3659525117305242, + "learning_rate": 3.3920924650277253e-06, + "loss": 0.23824100196361542, + "step": 3010 + }, + { + "epoch": 1.4878289880143334, + "grad_norm": 1.304246601014088, + "learning_rate": 3.3859620686981977e-06, + "loss": 0.25558948516845703, + "step": 3011 + }, + { + "epoch": 1.4883232423081676, + "grad_norm": 1.2977660969069507, + "learning_rate": 3.3798360878058887e-06, + "loss": 0.23521414399147034, + "step": 3012 + }, + { + "epoch": 1.4888174966020018, + "grad_norm": 1.5059732923775448, + "learning_rate": 3.373714526440417e-06, + "loss": 0.26024043560028076, + "step": 3013 + }, + { + "epoch": 1.489311750895836, + "grad_norm": 1.3966534942487767, + "learning_rate": 3.3675973886884506e-06, + "loss": 0.2676945626735687, + "step": 3014 + }, + { + "epoch": 1.4898060051896702, + "grad_norm": 1.4302757106543351, + "learning_rate": 3.361484678633701e-06, + "loss": 0.29499778151512146, + "step": 3015 + }, + { + "epoch": 1.4903002594835042, + "grad_norm": 1.2541194356509255, + "learning_rate": 3.35537640035693e-06, + "loss": 0.21667227149009705, + "step": 3016 + }, + { + "epoch": 1.4907945137773384, + "grad_norm": 1.5055716214820787, + "learning_rate": 3.3492725579359288e-06, + "loss": 0.2852727770805359, + "step": 3017 + }, + { + "epoch": 1.4912887680711726, + "grad_norm": 1.3110566349547437, + "learning_rate": 3.343173155445546e-06, + "loss": 0.22535362839698792, + "step": 3018 + }, + { + "epoch": 1.4917830223650068, + "grad_norm": 1.3390943365322368, + "learning_rate": 3.3370781969576473e-06, + "loss": 0.23513402044773102, + "step": 3019 + }, + { + "epoch": 1.492277276658841, + "grad_norm": 1.34171251218287, + "learning_rate": 3.3309876865411426e-06, + "loss": 0.2343328893184662, + "step": 3020 + }, + { + "epoch": 1.4927715309526752, + "grad_norm": 1.4982279835949508, + "learning_rate": 3.3249016282619696e-06, + "loss": 0.309964656829834, + "step": 3021 + }, + { + "epoch": 1.4932657852465092, + "grad_norm": 1.4104830526650916, + "learning_rate": 3.318820026183095e-06, + "loss": 0.2678214907646179, + "step": 3022 + }, + { + "epoch": 1.4937600395403434, + "grad_norm": 1.3871314289257326, + "learning_rate": 3.312742884364508e-06, + "loss": 0.24117907881736755, + "step": 3023 + }, + { + "epoch": 1.4942542938341776, + "grad_norm": 1.4966526123322192, + "learning_rate": 3.306670206863225e-06, + "loss": 0.23572009801864624, + "step": 3024 + }, + { + "epoch": 1.4947485481280118, + "grad_norm": 1.1974970903692888, + "learning_rate": 3.3006019977332728e-06, + "loss": 0.20058652758598328, + "step": 3025 + }, + { + "epoch": 1.495242802421846, + "grad_norm": 1.4552709446661256, + "learning_rate": 3.2945382610257017e-06, + "loss": 0.2433123141527176, + "step": 3026 + }, + { + "epoch": 1.4957370567156802, + "grad_norm": 1.330592869585441, + "learning_rate": 3.2884790007885834e-06, + "loss": 0.2648032009601593, + "step": 3027 + }, + { + "epoch": 1.4962313110095145, + "grad_norm": 1.4274009022113794, + "learning_rate": 3.2824242210669853e-06, + "loss": 0.23508986830711365, + "step": 3028 + }, + { + "epoch": 1.4967255653033487, + "grad_norm": 1.337116326245031, + "learning_rate": 3.2763739259029946e-06, + "loss": 0.2340327799320221, + "step": 3029 + }, + { + "epoch": 1.4972198195971829, + "grad_norm": 1.4724312525996526, + "learning_rate": 3.2703281193357028e-06, + "loss": 0.24071671068668365, + "step": 3030 + }, + { + "epoch": 1.4977140738910169, + "grad_norm": 1.4191732736253682, + "learning_rate": 3.264286805401203e-06, + "loss": 0.26332271099090576, + "step": 3031 + }, + { + "epoch": 1.498208328184851, + "grad_norm": 1.266600605298302, + "learning_rate": 3.2582499881325904e-06, + "loss": 0.21818014979362488, + "step": 3032 + }, + { + "epoch": 1.4987025824786853, + "grad_norm": 1.3340246980776698, + "learning_rate": 3.2522176715599606e-06, + "loss": 0.26997917890548706, + "step": 3033 + }, + { + "epoch": 1.4991968367725195, + "grad_norm": 1.4818331950802985, + "learning_rate": 3.2461898597103935e-06, + "loss": 0.21703608334064484, + "step": 3034 + }, + { + "epoch": 1.4996910910663537, + "grad_norm": 1.287764216628678, + "learning_rate": 3.240166556607979e-06, + "loss": 0.24345526099205017, + "step": 3035 + }, + { + "epoch": 1.5001853453601877, + "grad_norm": 1.2134455175661707, + "learning_rate": 3.2341477662737877e-06, + "loss": 0.2428402602672577, + "step": 3036 + }, + { + "epoch": 1.5006795996540219, + "grad_norm": 1.389226279044202, + "learning_rate": 3.228133492725872e-06, + "loss": 0.234619602560997, + "step": 3037 + }, + { + "epoch": 1.501173853947856, + "grad_norm": 1.3308420188359134, + "learning_rate": 3.2221237399792784e-06, + "loss": 0.27995944023132324, + "step": 3038 + }, + { + "epoch": 1.5016681082416903, + "grad_norm": 1.283844133259085, + "learning_rate": 3.2161185120460327e-06, + "loss": 0.23708665370941162, + "step": 3039 + }, + { + "epoch": 1.5021623625355245, + "grad_norm": 1.3268773172813266, + "learning_rate": 3.2101178129351373e-06, + "loss": 0.2541486620903015, + "step": 3040 + }, + { + "epoch": 1.5026566168293587, + "grad_norm": 1.2735534589560005, + "learning_rate": 3.204121646652576e-06, + "loss": 0.2281494140625, + "step": 3041 + }, + { + "epoch": 1.503150871123193, + "grad_norm": 1.4214183804465141, + "learning_rate": 3.1981300172013006e-06, + "loss": 0.24793995916843414, + "step": 3042 + }, + { + "epoch": 1.503645125417027, + "grad_norm": 1.3820844339773122, + "learning_rate": 3.19214292858124e-06, + "loss": 0.25877612829208374, + "step": 3043 + }, + { + "epoch": 1.5041393797108613, + "grad_norm": 1.2606638362034603, + "learning_rate": 3.1861603847892907e-06, + "loss": 0.23822908103466034, + "step": 3044 + }, + { + "epoch": 1.5046336340046955, + "grad_norm": 1.3375723790086107, + "learning_rate": 3.1801823898193075e-06, + "loss": 0.2450297623872757, + "step": 3045 + }, + { + "epoch": 1.5051278882985297, + "grad_norm": 1.291286771303469, + "learning_rate": 3.1742089476621176e-06, + "loss": 0.23657044768333435, + "step": 3046 + }, + { + "epoch": 1.505622142592364, + "grad_norm": 1.330327819651038, + "learning_rate": 3.1682400623055043e-06, + "loss": 0.22040539979934692, + "step": 3047 + }, + { + "epoch": 1.506116396886198, + "grad_norm": 1.2295078748580162, + "learning_rate": 3.162275737734213e-06, + "loss": 0.24671347439289093, + "step": 3048 + }, + { + "epoch": 1.5066106511800321, + "grad_norm": 1.3193055288047242, + "learning_rate": 3.156315977929939e-06, + "loss": 0.2590971291065216, + "step": 3049 + }, + { + "epoch": 1.5071049054738663, + "grad_norm": 1.3201796395435559, + "learning_rate": 3.1503607868713383e-06, + "loss": 0.2650923430919647, + "step": 3050 + }, + { + "epoch": 1.5075991597677005, + "grad_norm": 1.3124240495866886, + "learning_rate": 3.1444101685339987e-06, + "loss": 0.22146420180797577, + "step": 3051 + }, + { + "epoch": 1.5080934140615345, + "grad_norm": 1.3875424644692997, + "learning_rate": 3.1384641268904804e-06, + "loss": 0.26743125915527344, + "step": 3052 + }, + { + "epoch": 1.5085876683553687, + "grad_norm": 1.4406215302595167, + "learning_rate": 3.1325226659102746e-06, + "loss": 0.24730908870697021, + "step": 3053 + }, + { + "epoch": 1.509081922649203, + "grad_norm": 1.3933207280707873, + "learning_rate": 3.1265857895598094e-06, + "loss": 0.26301079988479614, + "step": 3054 + }, + { + "epoch": 1.5095761769430371, + "grad_norm": 1.2589035946994764, + "learning_rate": 3.1206535018024598e-06, + "loss": 0.22815877199172974, + "step": 3055 + }, + { + "epoch": 1.5100704312368713, + "grad_norm": 1.533757049437193, + "learning_rate": 3.114725806598544e-06, + "loss": 0.25178754329681396, + "step": 3056 + }, + { + "epoch": 1.5105646855307056, + "grad_norm": 1.3661154596053653, + "learning_rate": 3.1088027079052973e-06, + "loss": 0.20269548892974854, + "step": 3057 + }, + { + "epoch": 1.5110589398245398, + "grad_norm": 1.4014331356202114, + "learning_rate": 3.1028842096769006e-06, + "loss": 0.25972461700439453, + "step": 3058 + }, + { + "epoch": 1.511553194118374, + "grad_norm": 1.3745096869790834, + "learning_rate": 3.0969703158644583e-06, + "loss": 0.23313641548156738, + "step": 3059 + }, + { + "epoch": 1.5120474484122082, + "grad_norm": 1.2941298023610517, + "learning_rate": 3.0910610304159993e-06, + "loss": 0.2359476238489151, + "step": 3060 + }, + { + "epoch": 1.5125417027060424, + "grad_norm": 1.3631605592123968, + "learning_rate": 3.085156357276481e-06, + "loss": 0.263039767742157, + "step": 3061 + }, + { + "epoch": 1.5130359569998766, + "grad_norm": 1.4414947958352682, + "learning_rate": 3.0792563003877795e-06, + "loss": 0.2222701609134674, + "step": 3062 + }, + { + "epoch": 1.5135302112937106, + "grad_norm": 1.5152386602086467, + "learning_rate": 3.0733608636886815e-06, + "loss": 0.2511240839958191, + "step": 3063 + }, + { + "epoch": 1.5140244655875448, + "grad_norm": 1.3426863589238012, + "learning_rate": 3.0674700511149057e-06, + "loss": 0.26376873254776, + "step": 3064 + }, + { + "epoch": 1.514518719881379, + "grad_norm": 1.50705834278763, + "learning_rate": 3.0615838665990685e-06, + "loss": 0.2883176803588867, + "step": 3065 + }, + { + "epoch": 1.5150129741752132, + "grad_norm": 1.4534493774446482, + "learning_rate": 3.055702314070703e-06, + "loss": 0.2641439437866211, + "step": 3066 + }, + { + "epoch": 1.5155072284690472, + "grad_norm": 1.2206107550113217, + "learning_rate": 3.049825397456252e-06, + "loss": 0.22250229120254517, + "step": 3067 + }, + { + "epoch": 1.5160014827628814, + "grad_norm": 1.6917159383624243, + "learning_rate": 3.0439531206790585e-06, + "loss": 0.291684091091156, + "step": 3068 + }, + { + "epoch": 1.5164957370567156, + "grad_norm": 1.2582948861406589, + "learning_rate": 3.0380854876593725e-06, + "loss": 0.22581104934215546, + "step": 3069 + }, + { + "epoch": 1.5169899913505498, + "grad_norm": 1.3218689478609282, + "learning_rate": 3.032222502314345e-06, + "loss": 0.22701920568943024, + "step": 3070 + }, + { + "epoch": 1.517484245644384, + "grad_norm": 1.4011754473371674, + "learning_rate": 3.0263641685580134e-06, + "loss": 0.27151840925216675, + "step": 3071 + }, + { + "epoch": 1.5179784999382182, + "grad_norm": 1.4319870241234463, + "learning_rate": 3.0205104903013183e-06, + "loss": 0.25780510902404785, + "step": 3072 + }, + { + "epoch": 1.5184727542320524, + "grad_norm": 1.232949136662072, + "learning_rate": 3.014661471452103e-06, + "loss": 0.23905009031295776, + "step": 3073 + }, + { + "epoch": 1.5189670085258866, + "grad_norm": 1.296685135563547, + "learning_rate": 3.0088171159150758e-06, + "loss": 0.25984710454940796, + "step": 3074 + }, + { + "epoch": 1.5194612628197208, + "grad_norm": 1.5925440917505933, + "learning_rate": 3.0029774275918523e-06, + "loss": 0.24934321641921997, + "step": 3075 + }, + { + "epoch": 1.519955517113555, + "grad_norm": 1.3570253725800296, + "learning_rate": 2.997142410380921e-06, + "loss": 0.24181538820266724, + "step": 3076 + }, + { + "epoch": 1.5204497714073892, + "grad_norm": 1.4224922399256614, + "learning_rate": 2.9913120681776586e-06, + "loss": 0.28867265582084656, + "step": 3077 + }, + { + "epoch": 1.5209440257012234, + "grad_norm": 1.3689537883355085, + "learning_rate": 2.9854864048743183e-06, + "loss": 0.25082239508628845, + "step": 3078 + }, + { + "epoch": 1.5214382799950574, + "grad_norm": 1.1809552467181543, + "learning_rate": 2.979665424360031e-06, + "loss": 0.21152186393737793, + "step": 3079 + }, + { + "epoch": 1.5219325342888916, + "grad_norm": 1.3255328033562375, + "learning_rate": 2.9738491305207926e-06, + "loss": 0.22989922761917114, + "step": 3080 + }, + { + "epoch": 1.5224267885827258, + "grad_norm": 1.4352789035320561, + "learning_rate": 2.9680375272394855e-06, + "loss": 0.21606113016605377, + "step": 3081 + }, + { + "epoch": 1.5229210428765598, + "grad_norm": 1.2795767684328416, + "learning_rate": 2.962230618395855e-06, + "loss": 0.25060969591140747, + "step": 3082 + }, + { + "epoch": 1.523415297170394, + "grad_norm": 1.4409246111783223, + "learning_rate": 2.9564284078665016e-06, + "loss": 0.2574993371963501, + "step": 3083 + }, + { + "epoch": 1.5239095514642282, + "grad_norm": 1.3476850353049301, + "learning_rate": 2.9506308995249035e-06, + "loss": 0.2552590072154999, + "step": 3084 + }, + { + "epoch": 1.5244038057580624, + "grad_norm": 1.4294064187721107, + "learning_rate": 2.9448380972413936e-06, + "loss": 0.2356393188238144, + "step": 3085 + }, + { + "epoch": 1.5248980600518967, + "grad_norm": 1.2956637091449177, + "learning_rate": 2.939050004883164e-06, + "loss": 0.25111299753189087, + "step": 3086 + }, + { + "epoch": 1.5253923143457309, + "grad_norm": 1.6187968050107684, + "learning_rate": 2.933266626314263e-06, + "loss": 0.2713226079940796, + "step": 3087 + }, + { + "epoch": 1.525886568639565, + "grad_norm": 1.371480760416421, + "learning_rate": 2.92748796539559e-06, + "loss": 0.2493591606616974, + "step": 3088 + }, + { + "epoch": 1.5263808229333993, + "grad_norm": 1.3919253891743593, + "learning_rate": 2.9217140259848984e-06, + "loss": 0.2377934455871582, + "step": 3089 + }, + { + "epoch": 1.5268750772272335, + "grad_norm": 1.222188939870737, + "learning_rate": 2.9159448119367896e-06, + "loss": 0.23113523423671722, + "step": 3090 + }, + { + "epoch": 1.5273693315210677, + "grad_norm": 1.3071786210451368, + "learning_rate": 2.910180327102702e-06, + "loss": 0.2212657630443573, + "step": 3091 + }, + { + "epoch": 1.527863585814902, + "grad_norm": 1.4809706556535216, + "learning_rate": 2.904420575330923e-06, + "loss": 0.3317147195339203, + "step": 3092 + }, + { + "epoch": 1.528357840108736, + "grad_norm": 1.222501836116789, + "learning_rate": 2.8986655604665914e-06, + "loss": 0.21677865087985992, + "step": 3093 + }, + { + "epoch": 1.52885209440257, + "grad_norm": 1.4687657258901345, + "learning_rate": 2.892915286351663e-06, + "loss": 0.2719038724899292, + "step": 3094 + }, + { + "epoch": 1.5293463486964043, + "grad_norm": 1.4800981330468082, + "learning_rate": 2.887169756824941e-06, + "loss": 0.2870655953884125, + "step": 3095 + }, + { + "epoch": 1.5298406029902385, + "grad_norm": 1.6050530390151894, + "learning_rate": 2.8814289757220636e-06, + "loss": 0.27370864152908325, + "step": 3096 + }, + { + "epoch": 1.5303348572840727, + "grad_norm": 1.2925821727625635, + "learning_rate": 2.8756929468754834e-06, + "loss": 0.24579623341560364, + "step": 3097 + }, + { + "epoch": 1.5308291115779067, + "grad_norm": 1.5466324939604184, + "learning_rate": 2.869961674114501e-06, + "loss": 0.25092196464538574, + "step": 3098 + }, + { + "epoch": 1.531323365871741, + "grad_norm": 1.539826368870157, + "learning_rate": 2.864235161265232e-06, + "loss": 0.29637211561203003, + "step": 3099 + }, + { + "epoch": 1.531817620165575, + "grad_norm": 1.346232107313421, + "learning_rate": 2.8585134121506086e-06, + "loss": 0.24216854572296143, + "step": 3100 + }, + { + "epoch": 1.5323118744594093, + "grad_norm": 1.264644352464564, + "learning_rate": 2.8527964305903887e-06, + "loss": 0.2050018608570099, + "step": 3101 + }, + { + "epoch": 1.5328061287532435, + "grad_norm": 1.4429594327267479, + "learning_rate": 2.8470842204011562e-06, + "loss": 0.2323600798845291, + "step": 3102 + }, + { + "epoch": 1.5333003830470777, + "grad_norm": 1.3588986581117766, + "learning_rate": 2.8413767853962937e-06, + "loss": 0.2582741379737854, + "step": 3103 + }, + { + "epoch": 1.533794637340912, + "grad_norm": 1.2503142010331656, + "learning_rate": 2.8356741293860034e-06, + "loss": 0.2190069705247879, + "step": 3104 + }, + { + "epoch": 1.5342888916347461, + "grad_norm": 1.2700906528895424, + "learning_rate": 2.8299762561773004e-06, + "loss": 0.2293972671031952, + "step": 3105 + }, + { + "epoch": 1.5347831459285803, + "grad_norm": 1.4604730845156306, + "learning_rate": 2.8242831695740004e-06, + "loss": 0.28793102502822876, + "step": 3106 + }, + { + "epoch": 1.5352774002224145, + "grad_norm": 1.3871033704581968, + "learning_rate": 2.8185948733767276e-06, + "loss": 0.25700464844703674, + "step": 3107 + }, + { + "epoch": 1.5357716545162488, + "grad_norm": 1.6036334059609652, + "learning_rate": 2.8129113713829115e-06, + "loss": 0.2633448541164398, + "step": 3108 + }, + { + "epoch": 1.5362659088100827, + "grad_norm": 1.2623866770143863, + "learning_rate": 2.8072326673867667e-06, + "loss": 0.2363145351409912, + "step": 3109 + }, + { + "epoch": 1.536760163103917, + "grad_norm": 1.3073287831639788, + "learning_rate": 2.8015587651793273e-06, + "loss": 0.24324053525924683, + "step": 3110 + }, + { + "epoch": 1.5372544173977511, + "grad_norm": 1.445888976457047, + "learning_rate": 2.795889668548399e-06, + "loss": 0.24139198660850525, + "step": 3111 + }, + { + "epoch": 1.5377486716915854, + "grad_norm": 1.3070463104686283, + "learning_rate": 2.790225381278595e-06, + "loss": 0.2502334713935852, + "step": 3112 + }, + { + "epoch": 1.5382429259854193, + "grad_norm": 1.3233606598015195, + "learning_rate": 2.784565907151311e-06, + "loss": 0.24635109305381775, + "step": 3113 + }, + { + "epoch": 1.5387371802792535, + "grad_norm": 1.236974627125298, + "learning_rate": 2.7789112499447312e-06, + "loss": 0.2299586534500122, + "step": 3114 + }, + { + "epoch": 1.5392314345730878, + "grad_norm": 1.232633224868461, + "learning_rate": 2.7732614134338243e-06, + "loss": 0.2296627312898636, + "step": 3115 + }, + { + "epoch": 1.539725688866922, + "grad_norm": 1.3919487561893158, + "learning_rate": 2.767616401390343e-06, + "loss": 0.26127320528030396, + "step": 3116 + }, + { + "epoch": 1.5402199431607562, + "grad_norm": 1.3612758454379796, + "learning_rate": 2.761976217582808e-06, + "loss": 0.24718445539474487, + "step": 3117 + }, + { + "epoch": 1.5407141974545904, + "grad_norm": 1.3000063965271036, + "learning_rate": 2.7563408657765345e-06, + "loss": 0.22314362227916718, + "step": 3118 + }, + { + "epoch": 1.5412084517484246, + "grad_norm": 1.2190954536725822, + "learning_rate": 2.750710349733602e-06, + "loss": 0.2288416028022766, + "step": 3119 + }, + { + "epoch": 1.5417027060422588, + "grad_norm": 1.3774388084670495, + "learning_rate": 2.7450846732128577e-06, + "loss": 0.26181158423423767, + "step": 3120 + }, + { + "epoch": 1.542196960336093, + "grad_norm": 1.2123920647911897, + "learning_rate": 2.739463839969926e-06, + "loss": 0.22397834062576294, + "step": 3121 + }, + { + "epoch": 1.5426912146299272, + "grad_norm": 1.4361842348504215, + "learning_rate": 2.7338478537571943e-06, + "loss": 0.23633858561515808, + "step": 3122 + }, + { + "epoch": 1.5431854689237614, + "grad_norm": 1.402092217147563, + "learning_rate": 2.7282367183238143e-06, + "loss": 0.26719149947166443, + "step": 3123 + }, + { + "epoch": 1.5436797232175956, + "grad_norm": 1.5260713360749147, + "learning_rate": 2.722630437415701e-06, + "loss": 0.2882165014743805, + "step": 3124 + }, + { + "epoch": 1.5441739775114296, + "grad_norm": 1.258294682394544, + "learning_rate": 2.7170290147755285e-06, + "loss": 0.2377905696630478, + "step": 3125 + }, + { + "epoch": 1.5446682318052638, + "grad_norm": 1.3195147017546947, + "learning_rate": 2.7114324541427193e-06, + "loss": 0.2705368399620056, + "step": 3126 + }, + { + "epoch": 1.545162486099098, + "grad_norm": 1.2857701503132921, + "learning_rate": 2.7058407592534663e-06, + "loss": 0.246593177318573, + "step": 3127 + }, + { + "epoch": 1.5456567403929322, + "grad_norm": 1.33265619524068, + "learning_rate": 2.700253933840705e-06, + "loss": 0.2339816391468048, + "step": 3128 + }, + { + "epoch": 1.5461509946867662, + "grad_norm": 1.3254997645322988, + "learning_rate": 2.6946719816341127e-06, + "loss": 0.2727898359298706, + "step": 3129 + }, + { + "epoch": 1.5466452489806004, + "grad_norm": 1.483440007746236, + "learning_rate": 2.6890949063601255e-06, + "loss": 0.285343736410141, + "step": 3130 + }, + { + "epoch": 1.5471395032744346, + "grad_norm": 1.4219498161281177, + "learning_rate": 2.6835227117419184e-06, + "loss": 0.25782397389411926, + "step": 3131 + }, + { + "epoch": 1.5476337575682688, + "grad_norm": 1.4096561970820742, + "learning_rate": 2.67795540149941e-06, + "loss": 0.26677054166793823, + "step": 3132 + }, + { + "epoch": 1.548128011862103, + "grad_norm": 1.375758748898483, + "learning_rate": 2.6723929793492555e-06, + "loss": 0.2696993052959442, + "step": 3133 + }, + { + "epoch": 1.5486222661559372, + "grad_norm": 1.3214248540646165, + "learning_rate": 2.66683544900485e-06, + "loss": 0.2536013424396515, + "step": 3134 + }, + { + "epoch": 1.5491165204497714, + "grad_norm": 1.352660590997614, + "learning_rate": 2.661282814176319e-06, + "loss": 0.2583885192871094, + "step": 3135 + }, + { + "epoch": 1.5496107747436056, + "grad_norm": 1.3555750519784333, + "learning_rate": 2.655735078570528e-06, + "loss": 0.24341340363025665, + "step": 3136 + }, + { + "epoch": 1.5501050290374399, + "grad_norm": 1.3694743065317843, + "learning_rate": 2.650192245891059e-06, + "loss": 0.2575637698173523, + "step": 3137 + }, + { + "epoch": 1.550599283331274, + "grad_norm": 1.3743479794773286, + "learning_rate": 2.644654319838227e-06, + "loss": 0.24109753966331482, + "step": 3138 + }, + { + "epoch": 1.5510935376251083, + "grad_norm": 1.2822421062589742, + "learning_rate": 2.6391213041090822e-06, + "loss": 0.246525377035141, + "step": 3139 + }, + { + "epoch": 1.5515877919189422, + "grad_norm": 1.3144657839500415, + "learning_rate": 2.6335932023973777e-06, + "loss": 0.2589566111564636, + "step": 3140 + }, + { + "epoch": 1.5520820462127765, + "grad_norm": 1.333811387247849, + "learning_rate": 2.628070018393598e-06, + "loss": 0.26198744773864746, + "step": 3141 + }, + { + "epoch": 1.5525763005066107, + "grad_norm": 1.2808916237604833, + "learning_rate": 2.622551755784942e-06, + "loss": 0.22991782426834106, + "step": 3142 + }, + { + "epoch": 1.5530705548004449, + "grad_norm": 1.242582313641482, + "learning_rate": 2.6170384182553244e-06, + "loss": 0.22211629152297974, + "step": 3143 + }, + { + "epoch": 1.5535648090942789, + "grad_norm": 1.306994517774283, + "learning_rate": 2.6115300094853666e-06, + "loss": 0.2665289640426636, + "step": 3144 + }, + { + "epoch": 1.554059063388113, + "grad_norm": 1.260713008188702, + "learning_rate": 2.6060265331524114e-06, + "loss": 0.20211085677146912, + "step": 3145 + }, + { + "epoch": 1.5545533176819473, + "grad_norm": 1.3930467289400041, + "learning_rate": 2.6005279929304918e-06, + "loss": 0.24264919757843018, + "step": 3146 + }, + { + "epoch": 1.5550475719757815, + "grad_norm": 1.316241217623005, + "learning_rate": 2.595034392490354e-06, + "loss": 0.2722601294517517, + "step": 3147 + }, + { + "epoch": 1.5555418262696157, + "grad_norm": 1.3463437829147908, + "learning_rate": 2.58954573549946e-06, + "loss": 0.26061201095581055, + "step": 3148 + }, + { + "epoch": 1.5560360805634499, + "grad_norm": 1.3701131034296847, + "learning_rate": 2.5840620256219464e-06, + "loss": 0.20620305836200714, + "step": 3149 + }, + { + "epoch": 1.556530334857284, + "grad_norm": 1.3323948648350379, + "learning_rate": 2.578583266518664e-06, + "loss": 0.2424723207950592, + "step": 3150 + }, + { + "epoch": 1.5570245891511183, + "grad_norm": 1.4286998078779003, + "learning_rate": 2.573109461847153e-06, + "loss": 0.248019739985466, + "step": 3151 + }, + { + "epoch": 1.5575188434449525, + "grad_norm": 1.2753051030343154, + "learning_rate": 2.5676406152616483e-06, + "loss": 0.23162522912025452, + "step": 3152 + }, + { + "epoch": 1.5580130977387867, + "grad_norm": 1.6072180292151754, + "learning_rate": 2.562176730413074e-06, + "loss": 0.20099176466464996, + "step": 3153 + }, + { + "epoch": 1.558507352032621, + "grad_norm": 1.4868098360756863, + "learning_rate": 2.5567178109490433e-06, + "loss": 0.27957430481910706, + "step": 3154 + }, + { + "epoch": 1.5590016063264551, + "grad_norm": 1.248830156095604, + "learning_rate": 2.551263860513845e-06, + "loss": 0.23941464722156525, + "step": 3155 + }, + { + "epoch": 1.559495860620289, + "grad_norm": 1.4371594834198067, + "learning_rate": 2.5458148827484695e-06, + "loss": 0.24910275638103485, + "step": 3156 + }, + { + "epoch": 1.5599901149141233, + "grad_norm": 1.325153365111165, + "learning_rate": 2.540370881290568e-06, + "loss": 0.26430344581604004, + "step": 3157 + }, + { + "epoch": 1.5604843692079575, + "grad_norm": 1.419775898075986, + "learning_rate": 2.534931859774481e-06, + "loss": 0.2833614945411682, + "step": 3158 + }, + { + "epoch": 1.5609786235017915, + "grad_norm": 1.2863995969426358, + "learning_rate": 2.5294978218312215e-06, + "loss": 0.24630708992481232, + "step": 3159 + }, + { + "epoch": 1.5614728777956257, + "grad_norm": 1.398973984381973, + "learning_rate": 2.524068771088476e-06, + "loss": 0.2674857974052429, + "step": 3160 + }, + { + "epoch": 1.56196713208946, + "grad_norm": 1.34356245737179, + "learning_rate": 2.5186447111706005e-06, + "loss": 0.23531441390514374, + "step": 3161 + }, + { + "epoch": 1.5624613863832941, + "grad_norm": 1.2374731185400574, + "learning_rate": 2.5132256456986236e-06, + "loss": 0.2603223919868469, + "step": 3162 + }, + { + "epoch": 1.5629556406771283, + "grad_norm": 1.302457785178724, + "learning_rate": 2.5078115782902267e-06, + "loss": 0.220007985830307, + "step": 3163 + }, + { + "epoch": 1.5634498949709625, + "grad_norm": 1.36046018530454, + "learning_rate": 2.502402512559773e-06, + "loss": 0.22660651803016663, + "step": 3164 + }, + { + "epoch": 1.5639441492647967, + "grad_norm": 1.4627286861974862, + "learning_rate": 2.4969984521182766e-06, + "loss": 0.26425695419311523, + "step": 3165 + }, + { + "epoch": 1.564438403558631, + "grad_norm": 1.3019070428865334, + "learning_rate": 2.4915994005734057e-06, + "loss": 0.22870787978172302, + "step": 3166 + }, + { + "epoch": 1.5649326578524652, + "grad_norm": 1.2622414815912377, + "learning_rate": 2.48620536152949e-06, + "loss": 0.25734084844589233, + "step": 3167 + }, + { + "epoch": 1.5654269121462994, + "grad_norm": 1.2954820564672134, + "learning_rate": 2.4808163385875226e-06, + "loss": 0.24831843376159668, + "step": 3168 + }, + { + "epoch": 1.5659211664401336, + "grad_norm": 1.3356720372460569, + "learning_rate": 2.4754323353451284e-06, + "loss": 0.2389685958623886, + "step": 3169 + }, + { + "epoch": 1.5664154207339678, + "grad_norm": 1.33182477221405, + "learning_rate": 2.4700533553965946e-06, + "loss": 0.24750663340091705, + "step": 3170 + }, + { + "epoch": 1.5669096750278018, + "grad_norm": 1.4158946259185428, + "learning_rate": 2.4646794023328525e-06, + "loss": 0.2689003348350525, + "step": 3171 + }, + { + "epoch": 1.567403929321636, + "grad_norm": 1.32371836304635, + "learning_rate": 2.45931047974147e-06, + "loss": 0.2574145197868347, + "step": 3172 + }, + { + "epoch": 1.5678981836154702, + "grad_norm": 1.5403400973166155, + "learning_rate": 2.4539465912066706e-06, + "loss": 0.2586211562156677, + "step": 3173 + }, + { + "epoch": 1.5683924379093044, + "grad_norm": 1.340393455505496, + "learning_rate": 2.4485877403093095e-06, + "loss": 0.26383671164512634, + "step": 3174 + }, + { + "epoch": 1.5688866922031384, + "grad_norm": 1.2806590186816509, + "learning_rate": 2.4432339306268736e-06, + "loss": 0.28196123242378235, + "step": 3175 + }, + { + "epoch": 1.5693809464969726, + "grad_norm": 1.4692337066995136, + "learning_rate": 2.4378851657334923e-06, + "loss": 0.2736835181713104, + "step": 3176 + }, + { + "epoch": 1.5698752007908068, + "grad_norm": 1.3442483287569258, + "learning_rate": 2.4325414491999255e-06, + "loss": 0.2316201627254486, + "step": 3177 + }, + { + "epoch": 1.570369455084641, + "grad_norm": 1.363437265904272, + "learning_rate": 2.427202784593562e-06, + "loss": 0.23955810070037842, + "step": 3178 + }, + { + "epoch": 1.5708637093784752, + "grad_norm": 1.4240865879172782, + "learning_rate": 2.4218691754784162e-06, + "loss": 0.263042151927948, + "step": 3179 + }, + { + "epoch": 1.5713579636723094, + "grad_norm": 1.3283544396978941, + "learning_rate": 2.4165406254151312e-06, + "loss": 0.25570976734161377, + "step": 3180 + }, + { + "epoch": 1.5718522179661436, + "grad_norm": 1.3508561425487733, + "learning_rate": 2.4112171379609696e-06, + "loss": 0.2503488063812256, + "step": 3181 + }, + { + "epoch": 1.5723464722599778, + "grad_norm": 1.2731349274514334, + "learning_rate": 2.40589871666982e-06, + "loss": 0.21815824508666992, + "step": 3182 + }, + { + "epoch": 1.572840726553812, + "grad_norm": 1.4354076907799536, + "learning_rate": 2.400585365092177e-06, + "loss": 0.23936739563941956, + "step": 3183 + }, + { + "epoch": 1.5733349808476462, + "grad_norm": 1.2459112031686363, + "learning_rate": 2.3952770867751595e-06, + "loss": 0.2618086636066437, + "step": 3184 + }, + { + "epoch": 1.5738292351414804, + "grad_norm": 1.477109441631464, + "learning_rate": 2.3899738852625065e-06, + "loss": 0.2852020263671875, + "step": 3185 + }, + { + "epoch": 1.5743234894353144, + "grad_norm": 1.4364121007652697, + "learning_rate": 2.3846757640945505e-06, + "loss": 0.28860047459602356, + "step": 3186 + }, + { + "epoch": 1.5748177437291486, + "grad_norm": 1.2738328733534112, + "learning_rate": 2.3793827268082446e-06, + "loss": 0.2397383451461792, + "step": 3187 + }, + { + "epoch": 1.5753119980229828, + "grad_norm": 1.3548543446694599, + "learning_rate": 2.374094776937145e-06, + "loss": 0.25204962491989136, + "step": 3188 + }, + { + "epoch": 1.575806252316817, + "grad_norm": 1.2908932541507008, + "learning_rate": 2.368811918011411e-06, + "loss": 0.21216189861297607, + "step": 3189 + }, + { + "epoch": 1.576300506610651, + "grad_norm": 1.4719289728075926, + "learning_rate": 2.363534153557805e-06, + "loss": 0.2647620737552643, + "step": 3190 + }, + { + "epoch": 1.5767947609044852, + "grad_norm": 1.4154428976481128, + "learning_rate": 2.358261487099688e-06, + "loss": 0.3079666793346405, + "step": 3191 + }, + { + "epoch": 1.5772890151983194, + "grad_norm": 1.1847060614906242, + "learning_rate": 2.352993922157013e-06, + "loss": 0.22961711883544922, + "step": 3192 + }, + { + "epoch": 1.5777832694921536, + "grad_norm": 1.5460794294977342, + "learning_rate": 2.347731462246331e-06, + "loss": 0.2657305598258972, + "step": 3193 + }, + { + "epoch": 1.5782775237859878, + "grad_norm": 1.2622234684788671, + "learning_rate": 2.3424741108807914e-06, + "loss": 0.224237859249115, + "step": 3194 + }, + { + "epoch": 1.578771778079822, + "grad_norm": 1.4036688905605132, + "learning_rate": 2.337221871570121e-06, + "loss": 0.26459985971450806, + "step": 3195 + }, + { + "epoch": 1.5792660323736563, + "grad_norm": 1.4237290486306964, + "learning_rate": 2.331974747820641e-06, + "loss": 0.25391027331352234, + "step": 3196 + }, + { + "epoch": 1.5797602866674905, + "grad_norm": 1.3683418214908574, + "learning_rate": 2.326732743135256e-06, + "loss": 0.25822141766548157, + "step": 3197 + }, + { + "epoch": 1.5802545409613247, + "grad_norm": 1.3569651988075904, + "learning_rate": 2.3214958610134554e-06, + "loss": 0.25140073895454407, + "step": 3198 + }, + { + "epoch": 1.5807487952551589, + "grad_norm": 1.280802230226295, + "learning_rate": 2.3162641049513035e-06, + "loss": 0.2550397515296936, + "step": 3199 + }, + { + "epoch": 1.581243049548993, + "grad_norm": 1.3770416210337255, + "learning_rate": 2.3110374784414526e-06, + "loss": 0.2648996412754059, + "step": 3200 + }, + { + "epoch": 1.5817373038428273, + "grad_norm": 1.285627272529884, + "learning_rate": 2.3058159849731134e-06, + "loss": 0.235626682639122, + "step": 3201 + }, + { + "epoch": 1.5822315581366613, + "grad_norm": 1.354562155318599, + "learning_rate": 2.3005996280320873e-06, + "loss": 0.24930328130722046, + "step": 3202 + }, + { + "epoch": 1.5827258124304955, + "grad_norm": 1.299026803187305, + "learning_rate": 2.2953884111007428e-06, + "loss": 0.23712117969989777, + "step": 3203 + }, + { + "epoch": 1.5832200667243297, + "grad_norm": 1.4407443338733177, + "learning_rate": 2.290182337658007e-06, + "loss": 0.2504096031188965, + "step": 3204 + }, + { + "epoch": 1.583714321018164, + "grad_norm": 1.345261370550347, + "learning_rate": 2.2849814111793823e-06, + "loss": 0.2218465358018875, + "step": 3205 + }, + { + "epoch": 1.5842085753119979, + "grad_norm": 1.3818182639369938, + "learning_rate": 2.279785635136933e-06, + "loss": 0.2653011977672577, + "step": 3206 + }, + { + "epoch": 1.584702829605832, + "grad_norm": 1.521658991035551, + "learning_rate": 2.2745950129992853e-06, + "loss": 0.27551597356796265, + "step": 3207 + }, + { + "epoch": 1.5851970838996663, + "grad_norm": 1.2816405701256748, + "learning_rate": 2.2694095482316247e-06, + "loss": 0.21494519710540771, + "step": 3208 + }, + { + "epoch": 1.5856913381935005, + "grad_norm": 1.2804333364342155, + "learning_rate": 2.2642292442956925e-06, + "loss": 0.2517405152320862, + "step": 3209 + }, + { + "epoch": 1.5861855924873347, + "grad_norm": 1.365131298274178, + "learning_rate": 2.259054104649786e-06, + "loss": 0.25777050852775574, + "step": 3210 + }, + { + "epoch": 1.586679846781169, + "grad_norm": 1.3722239172040558, + "learning_rate": 2.2538841327487582e-06, + "loss": 0.25914469361305237, + "step": 3211 + }, + { + "epoch": 1.5871741010750031, + "grad_norm": 1.3924091851436682, + "learning_rate": 2.2487193320440017e-06, + "loss": 0.23877818882465363, + "step": 3212 + }, + { + "epoch": 1.5876683553688373, + "grad_norm": 1.2757007530985867, + "learning_rate": 2.2435597059834635e-06, + "loss": 0.2226967066526413, + "step": 3213 + }, + { + "epoch": 1.5881626096626715, + "grad_norm": 1.400079876174728, + "learning_rate": 2.2384052580116465e-06, + "loss": 0.28768399357795715, + "step": 3214 + }, + { + "epoch": 1.5886568639565057, + "grad_norm": 1.3700126786923876, + "learning_rate": 2.233255991569575e-06, + "loss": 0.2563883662223816, + "step": 3215 + }, + { + "epoch": 1.58915111825034, + "grad_norm": 1.3688176323163237, + "learning_rate": 2.2281119100948322e-06, + "loss": 0.2595394551753998, + "step": 3216 + }, + { + "epoch": 1.589645372544174, + "grad_norm": 1.2924408591101029, + "learning_rate": 2.2229730170215324e-06, + "loss": 0.2354460060596466, + "step": 3217 + }, + { + "epoch": 1.5901396268380081, + "grad_norm": 1.3015321221613778, + "learning_rate": 2.2178393157803225e-06, + "loss": 0.2397463619709015, + "step": 3218 + }, + { + "epoch": 1.5906338811318423, + "grad_norm": 1.5213512082778142, + "learning_rate": 2.212710809798393e-06, + "loss": 0.3304588794708252, + "step": 3219 + }, + { + "epoch": 1.5911281354256765, + "grad_norm": 1.2850321771259765, + "learning_rate": 2.207587502499464e-06, + "loss": 0.23891952633857727, + "step": 3220 + }, + { + "epoch": 1.5916223897195105, + "grad_norm": 1.3962733600240735, + "learning_rate": 2.2024693973037747e-06, + "loss": 0.2544774115085602, + "step": 3221 + }, + { + "epoch": 1.5921166440133447, + "grad_norm": 1.4193118785950918, + "learning_rate": 2.1973564976281003e-06, + "loss": 0.2620859444141388, + "step": 3222 + }, + { + "epoch": 1.592610898307179, + "grad_norm": 1.2794541555838774, + "learning_rate": 2.192248806885747e-06, + "loss": 0.22541281580924988, + "step": 3223 + }, + { + "epoch": 1.5931051526010132, + "grad_norm": 1.2886654382919192, + "learning_rate": 2.187146328486529e-06, + "loss": 0.23454351723194122, + "step": 3224 + }, + { + "epoch": 1.5935994068948474, + "grad_norm": 1.3236984572567387, + "learning_rate": 2.18204906583679e-06, + "loss": 0.24848732352256775, + "step": 3225 + }, + { + "epoch": 1.5940936611886816, + "grad_norm": 1.2049251354008288, + "learning_rate": 2.176957022339389e-06, + "loss": 0.21949590742588043, + "step": 3226 + }, + { + "epoch": 1.5945879154825158, + "grad_norm": 1.3436660246382202, + "learning_rate": 2.171870201393703e-06, + "loss": 0.27300944924354553, + "step": 3227 + }, + { + "epoch": 1.59508216977635, + "grad_norm": 1.3272049247129862, + "learning_rate": 2.16678860639562e-06, + "loss": 0.23850613832473755, + "step": 3228 + }, + { + "epoch": 1.5955764240701842, + "grad_norm": 1.4611172116234663, + "learning_rate": 2.1617122407375424e-06, + "loss": 0.2728792428970337, + "step": 3229 + }, + { + "epoch": 1.5960706783640184, + "grad_norm": 1.4623582491499052, + "learning_rate": 2.1566411078083726e-06, + "loss": 0.2321755588054657, + "step": 3230 + }, + { + "epoch": 1.5965649326578526, + "grad_norm": 1.5111460470858884, + "learning_rate": 2.1515752109935374e-06, + "loss": 0.30118101835250854, + "step": 3231 + }, + { + "epoch": 1.5970591869516868, + "grad_norm": 1.2041348970592753, + "learning_rate": 2.1465145536749475e-06, + "loss": 0.22317390143871307, + "step": 3232 + }, + { + "epoch": 1.5975534412455208, + "grad_norm": 1.4530812438401597, + "learning_rate": 2.141459139231029e-06, + "loss": 0.2906285524368286, + "step": 3233 + }, + { + "epoch": 1.598047695539355, + "grad_norm": 1.3996891865587815, + "learning_rate": 2.136408971036704e-06, + "loss": 0.24645069241523743, + "step": 3234 + }, + { + "epoch": 1.5985419498331892, + "grad_norm": 1.4725365119055005, + "learning_rate": 2.1313640524633927e-06, + "loss": 0.26764121651649475, + "step": 3235 + }, + { + "epoch": 1.5990362041270234, + "grad_norm": 1.4093554565168636, + "learning_rate": 2.126324386879012e-06, + "loss": 0.2811397910118103, + "step": 3236 + }, + { + "epoch": 1.5995304584208574, + "grad_norm": 1.5104838755570678, + "learning_rate": 2.121289977647971e-06, + "loss": 0.254316508769989, + "step": 3237 + }, + { + "epoch": 1.6000247127146916, + "grad_norm": 1.378432417546232, + "learning_rate": 2.1162608281311636e-06, + "loss": 0.2479352205991745, + "step": 3238 + }, + { + "epoch": 1.6005189670085258, + "grad_norm": 1.200711868039053, + "learning_rate": 2.1112369416859847e-06, + "loss": 0.22767537832260132, + "step": 3239 + }, + { + "epoch": 1.60101322130236, + "grad_norm": 1.3194506024522585, + "learning_rate": 2.106218321666309e-06, + "loss": 0.24286411702632904, + "step": 3240 + }, + { + "epoch": 1.6015074755961942, + "grad_norm": 1.4143801874217299, + "learning_rate": 2.1012049714224914e-06, + "loss": 0.22960595786571503, + "step": 3241 + }, + { + "epoch": 1.6020017298900284, + "grad_norm": 1.3710437918045983, + "learning_rate": 2.0961968943013742e-06, + "loss": 0.2448965162038803, + "step": 3242 + }, + { + "epoch": 1.6024959841838626, + "grad_norm": 1.3544107087641921, + "learning_rate": 2.0911940936462794e-06, + "loss": 0.23486846685409546, + "step": 3243 + }, + { + "epoch": 1.6029902384776968, + "grad_norm": 1.576868207611872, + "learning_rate": 2.0861965727970045e-06, + "loss": 0.2470572590827942, + "step": 3244 + }, + { + "epoch": 1.603484492771531, + "grad_norm": 1.3371604672673962, + "learning_rate": 2.0812043350898226e-06, + "loss": 0.283765971660614, + "step": 3245 + }, + { + "epoch": 1.6039787470653653, + "grad_norm": 1.3089763025714083, + "learning_rate": 2.076217383857484e-06, + "loss": 0.24943199753761292, + "step": 3246 + }, + { + "epoch": 1.6044730013591995, + "grad_norm": 1.2872721191375163, + "learning_rate": 2.0712357224291966e-06, + "loss": 0.22150146961212158, + "step": 3247 + }, + { + "epoch": 1.6049672556530334, + "grad_norm": 1.3204310548386595, + "learning_rate": 2.0662593541306563e-06, + "loss": 0.2610163390636444, + "step": 3248 + }, + { + "epoch": 1.6054615099468676, + "grad_norm": 1.243779369506435, + "learning_rate": 2.0612882822840154e-06, + "loss": 0.22789397835731506, + "step": 3249 + }, + { + "epoch": 1.6059557642407019, + "grad_norm": 1.3699765130937176, + "learning_rate": 2.056322510207882e-06, + "loss": 0.22956407070159912, + "step": 3250 + }, + { + "epoch": 1.606450018534536, + "grad_norm": 1.3752485526796745, + "learning_rate": 2.051362041217341e-06, + "loss": 0.2579299509525299, + "step": 3251 + }, + { + "epoch": 1.60694427282837, + "grad_norm": 1.449594870075983, + "learning_rate": 2.046406878623929e-06, + "loss": 0.24655218422412872, + "step": 3252 + }, + { + "epoch": 1.6074385271222043, + "grad_norm": 1.8413073723455704, + "learning_rate": 2.0414570257356415e-06, + "loss": 0.2325882464647293, + "step": 3253 + }, + { + "epoch": 1.6079327814160385, + "grad_norm": 1.3704743037638702, + "learning_rate": 2.0365124858569294e-06, + "loss": 0.2678581476211548, + "step": 3254 + }, + { + "epoch": 1.6084270357098727, + "grad_norm": 1.3329052595945479, + "learning_rate": 2.0315732622886976e-06, + "loss": 0.23200136423110962, + "step": 3255 + }, + { + "epoch": 1.6089212900037069, + "grad_norm": 1.3902434854443921, + "learning_rate": 2.0266393583283015e-06, + "loss": 0.24957536160945892, + "step": 3256 + }, + { + "epoch": 1.609415544297541, + "grad_norm": 1.3655551679458238, + "learning_rate": 2.0217107772695467e-06, + "loss": 0.2506657540798187, + "step": 3257 + }, + { + "epoch": 1.6099097985913753, + "grad_norm": 1.3749666602598227, + "learning_rate": 2.0167875224026788e-06, + "loss": 0.22255182266235352, + "step": 3258 + }, + { + "epoch": 1.6104040528852095, + "grad_norm": 1.222166534445823, + "learning_rate": 2.011869597014392e-06, + "loss": 0.2489611655473709, + "step": 3259 + }, + { + "epoch": 1.6108983071790437, + "grad_norm": 1.3283456479938487, + "learning_rate": 2.0069570043878305e-06, + "loss": 0.24808533489704132, + "step": 3260 + }, + { + "epoch": 1.611392561472878, + "grad_norm": 1.3963788000978605, + "learning_rate": 2.0020497478025635e-06, + "loss": 0.24013441801071167, + "step": 3261 + }, + { + "epoch": 1.6118868157667121, + "grad_norm": 1.4500389435817727, + "learning_rate": 1.997147830534608e-06, + "loss": 0.2813841998577118, + "step": 3262 + }, + { + "epoch": 1.612381070060546, + "grad_norm": 1.4817798618081903, + "learning_rate": 1.9922512558564154e-06, + "loss": 0.23727375268936157, + "step": 3263 + }, + { + "epoch": 1.6128753243543803, + "grad_norm": 1.3341145171932982, + "learning_rate": 1.9873600270368664e-06, + "loss": 0.2341655194759369, + "step": 3264 + }, + { + "epoch": 1.6133695786482145, + "grad_norm": 1.4911949653625025, + "learning_rate": 1.9824741473412768e-06, + "loss": 0.32069963216781616, + "step": 3265 + }, + { + "epoch": 1.6138638329420487, + "grad_norm": 1.4329702924332965, + "learning_rate": 1.977593620031393e-06, + "loss": 0.2414681762456894, + "step": 3266 + }, + { + "epoch": 1.6143580872358827, + "grad_norm": 1.4261430753271709, + "learning_rate": 1.9727184483653793e-06, + "loss": 0.25517842173576355, + "step": 3267 + }, + { + "epoch": 1.614852341529717, + "grad_norm": 1.4664579571771421, + "learning_rate": 1.967848635597831e-06, + "loss": 0.28264889121055603, + "step": 3268 + }, + { + "epoch": 1.6153465958235511, + "grad_norm": 1.2722226456356633, + "learning_rate": 1.962984184979774e-06, + "loss": 0.24543075263500214, + "step": 3269 + }, + { + "epoch": 1.6158408501173853, + "grad_norm": 1.3971489540759634, + "learning_rate": 1.9581250997586366e-06, + "loss": 0.2770763039588928, + "step": 3270 + }, + { + "epoch": 1.6163351044112195, + "grad_norm": 1.362601366326608, + "learning_rate": 1.953271383178278e-06, + "loss": 0.2521423101425171, + "step": 3271 + }, + { + "epoch": 1.6168293587050537, + "grad_norm": 1.262726405313237, + "learning_rate": 1.9484230384789702e-06, + "loss": 0.2402455359697342, + "step": 3272 + }, + { + "epoch": 1.617323612998888, + "grad_norm": 1.4929089203163604, + "learning_rate": 1.9435800688974005e-06, + "loss": 0.2947021424770355, + "step": 3273 + }, + { + "epoch": 1.6178178672927221, + "grad_norm": 1.2382587228414774, + "learning_rate": 1.938742477666663e-06, + "loss": 0.22238701581954956, + "step": 3274 + }, + { + "epoch": 1.6183121215865564, + "grad_norm": 1.2835510888376274, + "learning_rate": 1.933910268016269e-06, + "loss": 0.25475019216537476, + "step": 3275 + }, + { + "epoch": 1.6188063758803906, + "grad_norm": 1.2545564646453307, + "learning_rate": 1.929083443172125e-06, + "loss": 0.2316315472126007, + "step": 3276 + }, + { + "epoch": 1.6193006301742248, + "grad_norm": 1.2739392933893041, + "learning_rate": 1.9242620063565598e-06, + "loss": 0.24977952241897583, + "step": 3277 + }, + { + "epoch": 1.619794884468059, + "grad_norm": 1.3712132397422443, + "learning_rate": 1.9194459607882887e-06, + "loss": 0.24006152153015137, + "step": 3278 + }, + { + "epoch": 1.620289138761893, + "grad_norm": 1.2866259343493134, + "learning_rate": 1.9146353096824366e-06, + "loss": 0.26050522923469543, + "step": 3279 + }, + { + "epoch": 1.6207833930557272, + "grad_norm": 1.338449999730035, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.2698773443698883, + "step": 3280 + }, + { + "epoch": 1.6212776473495614, + "grad_norm": 1.480404977138073, + "learning_rate": 1.9050302037004765e-06, + "loss": 0.2627784013748169, + "step": 3281 + }, + { + "epoch": 1.6217719016433956, + "grad_norm": 1.3335627547093958, + "learning_rate": 1.900235755236599e-06, + "loss": 0.24261148273944855, + "step": 3282 + }, + { + "epoch": 1.6222661559372296, + "grad_norm": 1.347149973540751, + "learning_rate": 1.8954467140596023e-06, + "loss": 0.24689635634422302, + "step": 3283 + }, + { + "epoch": 1.6227604102310638, + "grad_norm": 1.4586477344669697, + "learning_rate": 1.890663083366574e-06, + "loss": 0.2885867953300476, + "step": 3284 + }, + { + "epoch": 1.623254664524898, + "grad_norm": 1.2981242679817548, + "learning_rate": 1.8858848663510066e-06, + "loss": 0.2624407112598419, + "step": 3285 + }, + { + "epoch": 1.6237489188187322, + "grad_norm": 1.4544775837390882, + "learning_rate": 1.881112066202767e-06, + "loss": 0.27705928683280945, + "step": 3286 + }, + { + "epoch": 1.6242431731125664, + "grad_norm": 1.4465119903360202, + "learning_rate": 1.8763446861081058e-06, + "loss": 0.26406094431877136, + "step": 3287 + }, + { + "epoch": 1.6247374274064006, + "grad_norm": 1.3239739188563808, + "learning_rate": 1.8715827292496557e-06, + "loss": 0.26495790481567383, + "step": 3288 + }, + { + "epoch": 1.6252316817002348, + "grad_norm": 1.419298583557058, + "learning_rate": 1.8668261988064406e-06, + "loss": 0.24995195865631104, + "step": 3289 + }, + { + "epoch": 1.625725935994069, + "grad_norm": 1.4058286500391235, + "learning_rate": 1.8620750979538437e-06, + "loss": 0.23043034970760345, + "step": 3290 + }, + { + "epoch": 1.6262201902879032, + "grad_norm": 1.3959905154788135, + "learning_rate": 1.8573294298636334e-06, + "loss": 0.2590731978416443, + "step": 3291 + }, + { + "epoch": 1.6267144445817374, + "grad_norm": 1.3919450960931963, + "learning_rate": 1.8525891977039557e-06, + "loss": 0.24246811866760254, + "step": 3292 + }, + { + "epoch": 1.6272086988755716, + "grad_norm": 1.2790623939923147, + "learning_rate": 1.847854404639311e-06, + "loss": 0.2386825680732727, + "step": 3293 + }, + { + "epoch": 1.6277029531694056, + "grad_norm": 1.3168324939527787, + "learning_rate": 1.843125053830588e-06, + "loss": 0.2243885099887848, + "step": 3294 + }, + { + "epoch": 1.6281972074632398, + "grad_norm": 1.264397606173487, + "learning_rate": 1.838401148435035e-06, + "loss": 0.24984796345233917, + "step": 3295 + }, + { + "epoch": 1.628691461757074, + "grad_norm": 1.3015406971863621, + "learning_rate": 1.8336826916062568e-06, + "loss": 0.22784638404846191, + "step": 3296 + }, + { + "epoch": 1.6291857160509082, + "grad_norm": 1.4178841831435534, + "learning_rate": 1.828969686494232e-06, + "loss": 0.24812597036361694, + "step": 3297 + }, + { + "epoch": 1.6296799703447422, + "grad_norm": 1.2132930880582795, + "learning_rate": 1.8242621362452939e-06, + "loss": 0.234031543135643, + "step": 3298 + }, + { + "epoch": 1.6301742246385764, + "grad_norm": 1.251471335677166, + "learning_rate": 1.8195600440021377e-06, + "loss": 0.22455371916294098, + "step": 3299 + }, + { + "epoch": 1.6306684789324106, + "grad_norm": 1.2488121980955387, + "learning_rate": 1.8148634129038113e-06, + "loss": 0.22605910897254944, + "step": 3300 + }, + { + "epoch": 1.6311627332262448, + "grad_norm": 1.3484568896035969, + "learning_rate": 1.8101722460857184e-06, + "loss": 0.2527684271335602, + "step": 3301 + }, + { + "epoch": 1.631656987520079, + "grad_norm": 1.355365003110194, + "learning_rate": 1.8054865466796167e-06, + "loss": 0.24625766277313232, + "step": 3302 + }, + { + "epoch": 1.6321512418139132, + "grad_norm": 1.7061568076136007, + "learning_rate": 1.8008063178136125e-06, + "loss": 0.31236231327056885, + "step": 3303 + }, + { + "epoch": 1.6326454961077475, + "grad_norm": 1.3433390649211776, + "learning_rate": 1.7961315626121566e-06, + "loss": 0.21256005764007568, + "step": 3304 + }, + { + "epoch": 1.6331397504015817, + "grad_norm": 1.5039173087965194, + "learning_rate": 1.7914622841960482e-06, + "loss": 0.25238949060440063, + "step": 3305 + }, + { + "epoch": 1.6336340046954159, + "grad_norm": 1.3709723014330413, + "learning_rate": 1.7867984856824382e-06, + "loss": 0.29630619287490845, + "step": 3306 + }, + { + "epoch": 1.63412825898925, + "grad_norm": 1.3891654533842075, + "learning_rate": 1.782140170184804e-06, + "loss": 0.26159363985061646, + "step": 3307 + }, + { + "epoch": 1.6346225132830843, + "grad_norm": 1.2884457367333761, + "learning_rate": 1.7774873408129733e-06, + "loss": 0.22361448407173157, + "step": 3308 + }, + { + "epoch": 1.6351167675769185, + "grad_norm": 1.410142665529872, + "learning_rate": 1.7728400006731083e-06, + "loss": 0.23890942335128784, + "step": 3309 + }, + { + "epoch": 1.6356110218707525, + "grad_norm": 1.3147986477314286, + "learning_rate": 1.7681981528677073e-06, + "loss": 0.23067504167556763, + "step": 3310 + }, + { + "epoch": 1.6361052761645867, + "grad_norm": 1.4202307068972662, + "learning_rate": 1.7635618004956012e-06, + "loss": 0.24790561199188232, + "step": 3311 + }, + { + "epoch": 1.6365995304584209, + "grad_norm": 1.3183461895569366, + "learning_rate": 1.7589309466519556e-06, + "loss": 0.2590476870536804, + "step": 3312 + }, + { + "epoch": 1.637093784752255, + "grad_norm": 1.4033807602679105, + "learning_rate": 1.754305594428254e-06, + "loss": 0.26833316683769226, + "step": 3313 + }, + { + "epoch": 1.637588039046089, + "grad_norm": 1.2949590395956057, + "learning_rate": 1.749685746912323e-06, + "loss": 0.23390671610832214, + "step": 3314 + }, + { + "epoch": 1.6380822933399233, + "grad_norm": 1.350070481785481, + "learning_rate": 1.7450714071883079e-06, + "loss": 0.2760172188282013, + "step": 3315 + }, + { + "epoch": 1.6385765476337575, + "grad_norm": 1.387338184553767, + "learning_rate": 1.7404625783366703e-06, + "loss": 0.255672812461853, + "step": 3316 + }, + { + "epoch": 1.6390708019275917, + "grad_norm": 1.4073549622144716, + "learning_rate": 1.7358592634342008e-06, + "loss": 0.26336947083473206, + "step": 3317 + }, + { + "epoch": 1.639565056221426, + "grad_norm": 1.2609217918610456, + "learning_rate": 1.7312614655540071e-06, + "loss": 0.2308199107646942, + "step": 3318 + }, + { + "epoch": 1.64005931051526, + "grad_norm": 1.335188741822115, + "learning_rate": 1.7266691877655129e-06, + "loss": 0.24762676656246185, + "step": 3319 + }, + { + "epoch": 1.6405535648090943, + "grad_norm": 1.3287358421539026, + "learning_rate": 1.7220824331344577e-06, + "loss": 0.2175157219171524, + "step": 3320 + }, + { + "epoch": 1.6410478191029285, + "grad_norm": 1.3635707435478155, + "learning_rate": 1.7175012047228956e-06, + "loss": 0.24319039285182953, + "step": 3321 + }, + { + "epoch": 1.6415420733967627, + "grad_norm": 1.2272267263054326, + "learning_rate": 1.7129255055891813e-06, + "loss": 0.21708521246910095, + "step": 3322 + }, + { + "epoch": 1.642036327690597, + "grad_norm": 1.4404881849035673, + "learning_rate": 1.7083553387879969e-06, + "loss": 0.28576910495758057, + "step": 3323 + }, + { + "epoch": 1.6425305819844311, + "grad_norm": 1.3120467826579518, + "learning_rate": 1.703790707370313e-06, + "loss": 0.2664312720298767, + "step": 3324 + }, + { + "epoch": 1.6430248362782651, + "grad_norm": 1.5950926505285568, + "learning_rate": 1.6992316143834142e-06, + "loss": 0.23930951952934265, + "step": 3325 + }, + { + "epoch": 1.6435190905720993, + "grad_norm": 1.3985303284465023, + "learning_rate": 1.694678062870886e-06, + "loss": 0.2741955518722534, + "step": 3326 + }, + { + "epoch": 1.6440133448659335, + "grad_norm": 1.2830935776841221, + "learning_rate": 1.6901300558726142e-06, + "loss": 0.25177690386772156, + "step": 3327 + }, + { + "epoch": 1.6445075991597677, + "grad_norm": 1.4111945712412088, + "learning_rate": 1.6855875964247837e-06, + "loss": 0.26517611742019653, + "step": 3328 + }, + { + "epoch": 1.6450018534536017, + "grad_norm": 1.227994601145186, + "learning_rate": 1.6810506875598776e-06, + "loss": 0.2294573187828064, + "step": 3329 + }, + { + "epoch": 1.645496107747436, + "grad_norm": 1.3101987526620804, + "learning_rate": 1.6765193323066653e-06, + "loss": 0.23062998056411743, + "step": 3330 + }, + { + "epoch": 1.6459903620412701, + "grad_norm": 1.4687005380243534, + "learning_rate": 1.6719935336902205e-06, + "loss": 0.3047422468662262, + "step": 3331 + }, + { + "epoch": 1.6464846163351043, + "grad_norm": 1.4214345840675306, + "learning_rate": 1.6674732947319017e-06, + "loss": 0.2715694308280945, + "step": 3332 + }, + { + "epoch": 1.6469788706289386, + "grad_norm": 1.3486732362780178, + "learning_rate": 1.6629586184493519e-06, + "loss": 0.20359721779823303, + "step": 3333 + }, + { + "epoch": 1.6474731249227728, + "grad_norm": 1.256842666883273, + "learning_rate": 1.6584495078565045e-06, + "loss": 0.20083262026309967, + "step": 3334 + }, + { + "epoch": 1.647967379216607, + "grad_norm": 1.2824441486710174, + "learning_rate": 1.6539459659635848e-06, + "loss": 0.2274707555770874, + "step": 3335 + }, + { + "epoch": 1.6484616335104412, + "grad_norm": 1.4170790489583633, + "learning_rate": 1.6494479957770847e-06, + "loss": 0.2654137909412384, + "step": 3336 + }, + { + "epoch": 1.6489558878042754, + "grad_norm": 1.2207871831065553, + "learning_rate": 1.644955600299788e-06, + "loss": 0.24672716856002808, + "step": 3337 + }, + { + "epoch": 1.6494501420981096, + "grad_norm": 2.7206661248050494, + "learning_rate": 1.640468782530753e-06, + "loss": 0.21563802659511566, + "step": 3338 + }, + { + "epoch": 1.6499443963919438, + "grad_norm": 1.2772497258385302, + "learning_rate": 1.6359875454653151e-06, + "loss": 0.22986169159412384, + "step": 3339 + }, + { + "epoch": 1.650438650685778, + "grad_norm": 1.1914212857874291, + "learning_rate": 1.6315118920950857e-06, + "loss": 0.22981731593608856, + "step": 3340 + }, + { + "epoch": 1.650932904979612, + "grad_norm": 1.423180347857553, + "learning_rate": 1.6270418254079478e-06, + "loss": 0.25922536849975586, + "step": 3341 + }, + { + "epoch": 1.6514271592734462, + "grad_norm": 1.3808711162643625, + "learning_rate": 1.6225773483880503e-06, + "loss": 0.23273468017578125, + "step": 3342 + }, + { + "epoch": 1.6519214135672804, + "grad_norm": 1.3019728240659525, + "learning_rate": 1.6181184640158165e-06, + "loss": 0.22988896071910858, + "step": 3343 + }, + { + "epoch": 1.6524156678611144, + "grad_norm": 1.3674976753844925, + "learning_rate": 1.6136651752679333e-06, + "loss": 0.2628646790981293, + "step": 3344 + }, + { + "epoch": 1.6529099221549486, + "grad_norm": 1.3498513177046836, + "learning_rate": 1.6092174851173526e-06, + "loss": 0.24670086801052094, + "step": 3345 + }, + { + "epoch": 1.6534041764487828, + "grad_norm": 1.3175919767027275, + "learning_rate": 1.6047753965332902e-06, + "loss": 0.27845436334609985, + "step": 3346 + }, + { + "epoch": 1.653898430742617, + "grad_norm": 1.30200656487082, + "learning_rate": 1.6003389124812185e-06, + "loss": 0.25297483801841736, + "step": 3347 + }, + { + "epoch": 1.6543926850364512, + "grad_norm": 1.237195636484559, + "learning_rate": 1.595908035922873e-06, + "loss": 0.18876859545707703, + "step": 3348 + }, + { + "epoch": 1.6548869393302854, + "grad_norm": 1.3417621492525376, + "learning_rate": 1.591482769816246e-06, + "loss": 0.23852673172950745, + "step": 3349 + }, + { + "epoch": 1.6553811936241196, + "grad_norm": 1.3350614987774176, + "learning_rate": 1.587063117115576e-06, + "loss": 0.2569701373577118, + "step": 3350 + }, + { + "epoch": 1.6558754479179538, + "grad_norm": 1.354350083762125, + "learning_rate": 1.582649080771359e-06, + "loss": 0.29305699467658997, + "step": 3351 + }, + { + "epoch": 1.656369702211788, + "grad_norm": 1.42534989112271, + "learning_rate": 1.5782406637303527e-06, + "loss": 0.28942832350730896, + "step": 3352 + }, + { + "epoch": 1.6568639565056222, + "grad_norm": 1.351062882636418, + "learning_rate": 1.5738378689355439e-06, + "loss": 0.27491068840026855, + "step": 3353 + }, + { + "epoch": 1.6573582107994564, + "grad_norm": 1.4736732865815314, + "learning_rate": 1.569440699326179e-06, + "loss": 0.26730844378471375, + "step": 3354 + }, + { + "epoch": 1.6578524650932907, + "grad_norm": 1.3194299490413177, + "learning_rate": 1.5650491578377458e-06, + "loss": 0.23610982298851013, + "step": 3355 + }, + { + "epoch": 1.6583467193871246, + "grad_norm": 1.5894671595119023, + "learning_rate": 1.5606632474019734e-06, + "loss": 0.26817262172698975, + "step": 3356 + }, + { + "epoch": 1.6588409736809588, + "grad_norm": 1.4847304906222882, + "learning_rate": 1.556282970946833e-06, + "loss": 0.2403341382741928, + "step": 3357 + }, + { + "epoch": 1.659335227974793, + "grad_norm": 1.4109665373138245, + "learning_rate": 1.5519083313965378e-06, + "loss": 0.24433058500289917, + "step": 3358 + }, + { + "epoch": 1.6598294822686273, + "grad_norm": 1.2685951523616033, + "learning_rate": 1.5475393316715282e-06, + "loss": 0.2526702582836151, + "step": 3359 + }, + { + "epoch": 1.6603237365624612, + "grad_norm": 1.3373930264060108, + "learning_rate": 1.543175974688491e-06, + "loss": 0.24032334983348846, + "step": 3360 + }, + { + "epoch": 1.6608179908562954, + "grad_norm": 1.3759465001084996, + "learning_rate": 1.5388182633603433e-06, + "loss": 0.27770349383354187, + "step": 3361 + }, + { + "epoch": 1.6613122451501297, + "grad_norm": 1.5590715119269358, + "learning_rate": 1.534466200596224e-06, + "loss": 0.26002752780914307, + "step": 3362 + }, + { + "epoch": 1.6618064994439639, + "grad_norm": 1.867324678142589, + "learning_rate": 1.5301197893015129e-06, + "loss": 0.2707037329673767, + "step": 3363 + }, + { + "epoch": 1.662300753737798, + "grad_norm": 1.3300911116600942, + "learning_rate": 1.52577903237781e-06, + "loss": 0.27249252796173096, + "step": 3364 + }, + { + "epoch": 1.6627950080316323, + "grad_norm": 1.341030721831506, + "learning_rate": 1.5214439327229425e-06, + "loss": 0.22495020925998688, + "step": 3365 + }, + { + "epoch": 1.6632892623254665, + "grad_norm": 1.4580410293752506, + "learning_rate": 1.5171144932309622e-06, + "loss": 0.23561973869800568, + "step": 3366 + }, + { + "epoch": 1.6637835166193007, + "grad_norm": 1.4580927261417298, + "learning_rate": 1.512790716792143e-06, + "loss": 0.2689869701862335, + "step": 3367 + }, + { + "epoch": 1.664277770913135, + "grad_norm": 1.2734577307213573, + "learning_rate": 1.5084726062929688e-06, + "loss": 0.22249455749988556, + "step": 3368 + }, + { + "epoch": 1.664772025206969, + "grad_norm": 1.444110335390912, + "learning_rate": 1.5041601646161585e-06, + "loss": 0.24586130678653717, + "step": 3369 + }, + { + "epoch": 1.6652662795008033, + "grad_norm": 1.3250583547488792, + "learning_rate": 1.499853394640629e-06, + "loss": 0.2549409568309784, + "step": 3370 + }, + { + "epoch": 1.6657605337946373, + "grad_norm": 1.4135792596464256, + "learning_rate": 1.4955522992415206e-06, + "loss": 0.2517774999141693, + "step": 3371 + }, + { + "epoch": 1.6662547880884715, + "grad_norm": 1.6132674993246225, + "learning_rate": 1.491256881290184e-06, + "loss": 0.2627662420272827, + "step": 3372 + }, + { + "epoch": 1.6667490423823057, + "grad_norm": 1.250156659660365, + "learning_rate": 1.4869671436541788e-06, + "loss": 0.25203272700309753, + "step": 3373 + }, + { + "epoch": 1.66724329667614, + "grad_norm": 1.3035778741812132, + "learning_rate": 1.482683089197271e-06, + "loss": 0.2206164300441742, + "step": 3374 + }, + { + "epoch": 1.667737550969974, + "grad_norm": 1.4034071560123977, + "learning_rate": 1.4784047207794383e-06, + "loss": 0.2551203966140747, + "step": 3375 + }, + { + "epoch": 1.668231805263808, + "grad_norm": 1.4247468939554981, + "learning_rate": 1.4741320412568505e-06, + "loss": 0.2592264711856842, + "step": 3376 + }, + { + "epoch": 1.6687260595576423, + "grad_norm": 1.3609833066581156, + "learning_rate": 1.4698650534818936e-06, + "loss": 0.25902658700942993, + "step": 3377 + }, + { + "epoch": 1.6692203138514765, + "grad_norm": 1.5283083080675575, + "learning_rate": 1.4656037603031491e-06, + "loss": 0.2685459852218628, + "step": 3378 + }, + { + "epoch": 1.6697145681453107, + "grad_norm": 1.2083368696295387, + "learning_rate": 1.4613481645653914e-06, + "loss": 0.21010839939117432, + "step": 3379 + }, + { + "epoch": 1.670208822439145, + "grad_norm": 1.3019618254178054, + "learning_rate": 1.4570982691095925e-06, + "loss": 0.23318082094192505, + "step": 3380 + }, + { + "epoch": 1.6707030767329791, + "grad_norm": 1.346937478273973, + "learning_rate": 1.4528540767729315e-06, + "loss": 0.25045326352119446, + "step": 3381 + }, + { + "epoch": 1.6711973310268133, + "grad_norm": 1.5157571774504706, + "learning_rate": 1.4486155903887623e-06, + "loss": 0.2436288446187973, + "step": 3382 + }, + { + "epoch": 1.6716915853206475, + "grad_norm": 1.2766580343897052, + "learning_rate": 1.444382812786641e-06, + "loss": 0.20454761385917664, + "step": 3383 + }, + { + "epoch": 1.6721858396144818, + "grad_norm": 1.3207693230256567, + "learning_rate": 1.4401557467923089e-06, + "loss": 0.24906963109970093, + "step": 3384 + }, + { + "epoch": 1.672680093908316, + "grad_norm": 1.3391460516330347, + "learning_rate": 1.435934395227695e-06, + "loss": 0.2552015483379364, + "step": 3385 + }, + { + "epoch": 1.6731743482021502, + "grad_norm": 1.3523733680416914, + "learning_rate": 1.4317187609109129e-06, + "loss": 0.2393915057182312, + "step": 3386 + }, + { + "epoch": 1.6736686024959841, + "grad_norm": 1.370539563215592, + "learning_rate": 1.4275088466562625e-06, + "loss": 0.2607477009296417, + "step": 3387 + }, + { + "epoch": 1.6741628567898184, + "grad_norm": 1.3296614147148798, + "learning_rate": 1.423304655274218e-06, + "loss": 0.23722632229328156, + "step": 3388 + }, + { + "epoch": 1.6746571110836526, + "grad_norm": 1.303256653854929, + "learning_rate": 1.4191061895714398e-06, + "loss": 0.2614964246749878, + "step": 3389 + }, + { + "epoch": 1.6751513653774868, + "grad_norm": 1.476448410559568, + "learning_rate": 1.4149134523507634e-06, + "loss": 0.2727823555469513, + "step": 3390 + }, + { + "epoch": 1.6756456196713208, + "grad_norm": 1.2739771939884463, + "learning_rate": 1.4107264464112003e-06, + "loss": 0.25176581740379333, + "step": 3391 + }, + { + "epoch": 1.676139873965155, + "grad_norm": 1.3087240197668597, + "learning_rate": 1.4065451745479352e-06, + "loss": 0.21339070796966553, + "step": 3392 + }, + { + "epoch": 1.6766341282589892, + "grad_norm": 1.449069234603101, + "learning_rate": 1.4023696395523267e-06, + "loss": 0.26540419459342957, + "step": 3393 + }, + { + "epoch": 1.6771283825528234, + "grad_norm": 1.3788929945945605, + "learning_rate": 1.3981998442119017e-06, + "loss": 0.2621360421180725, + "step": 3394 + }, + { + "epoch": 1.6776226368466576, + "grad_norm": 1.3149158272362809, + "learning_rate": 1.3940357913103576e-06, + "loss": 0.2578747570514679, + "step": 3395 + }, + { + "epoch": 1.6781168911404918, + "grad_norm": 1.3223117210430684, + "learning_rate": 1.3898774836275531e-06, + "loss": 0.26105010509490967, + "step": 3396 + }, + { + "epoch": 1.678611145434326, + "grad_norm": 1.277709690267506, + "learning_rate": 1.3857249239395143e-06, + "loss": 0.2221919298171997, + "step": 3397 + }, + { + "epoch": 1.6791053997281602, + "grad_norm": 1.3742911888899896, + "learning_rate": 1.3815781150184382e-06, + "loss": 0.2498932033777237, + "step": 3398 + }, + { + "epoch": 1.6795996540219944, + "grad_norm": 1.3631278461436225, + "learning_rate": 1.377437059632668e-06, + "loss": 0.29306796193122864, + "step": 3399 + }, + { + "epoch": 1.6800939083158286, + "grad_norm": 1.41106483401144, + "learning_rate": 1.3733017605467158e-06, + "loss": 0.23804892599582672, + "step": 3400 + }, + { + "epoch": 1.6805881626096628, + "grad_norm": 1.264388446305106, + "learning_rate": 1.3691722205212465e-06, + "loss": 0.18528425693511963, + "step": 3401 + }, + { + "epoch": 1.6810824169034968, + "grad_norm": 1.434400904695952, + "learning_rate": 1.365048442313085e-06, + "loss": 0.257534921169281, + "step": 3402 + }, + { + "epoch": 1.681576671197331, + "grad_norm": 1.390183210111369, + "learning_rate": 1.3609304286752034e-06, + "loss": 0.2519993782043457, + "step": 3403 + }, + { + "epoch": 1.6820709254911652, + "grad_norm": 1.5041703905686798, + "learning_rate": 1.3568181823567328e-06, + "loss": 0.27830445766448975, + "step": 3404 + }, + { + "epoch": 1.6825651797849994, + "grad_norm": 1.3496130761993563, + "learning_rate": 1.3527117061029438e-06, + "loss": 0.22532883286476135, + "step": 3405 + }, + { + "epoch": 1.6830594340788334, + "grad_norm": 1.3484913124474047, + "learning_rate": 1.3486110026552668e-06, + "loss": 0.23230011761188507, + "step": 3406 + }, + { + "epoch": 1.6835536883726676, + "grad_norm": 1.320791018685261, + "learning_rate": 1.3445160747512743e-06, + "loss": 0.24105653166770935, + "step": 3407 + }, + { + "epoch": 1.6840479426665018, + "grad_norm": 1.5077644423875391, + "learning_rate": 1.340426925124676e-06, + "loss": 0.2946394681930542, + "step": 3408 + }, + { + "epoch": 1.684542196960336, + "grad_norm": 1.403422513607122, + "learning_rate": 1.3363435565053319e-06, + "loss": 0.2682989239692688, + "step": 3409 + }, + { + "epoch": 1.6850364512541702, + "grad_norm": 1.3363195283881322, + "learning_rate": 1.332265971619241e-06, + "loss": 0.2219456285238266, + "step": 3410 + }, + { + "epoch": 1.6855307055480044, + "grad_norm": 1.2440577869208935, + "learning_rate": 1.3281941731885396e-06, + "loss": 0.22532151639461517, + "step": 3411 + }, + { + "epoch": 1.6860249598418386, + "grad_norm": 1.3951142777226702, + "learning_rate": 1.324128163931504e-06, + "loss": 0.24166807532310486, + "step": 3412 + }, + { + "epoch": 1.6865192141356729, + "grad_norm": 1.8803758040895027, + "learning_rate": 1.3200679465625453e-06, + "loss": 0.25514671206474304, + "step": 3413 + }, + { + "epoch": 1.687013468429507, + "grad_norm": 1.4161288294493581, + "learning_rate": 1.3160135237922011e-06, + "loss": 0.263123482465744, + "step": 3414 + }, + { + "epoch": 1.6875077227233413, + "grad_norm": 1.3692510048196695, + "learning_rate": 1.3119648983271527e-06, + "loss": 0.23763976991176605, + "step": 3415 + }, + { + "epoch": 1.6880019770171755, + "grad_norm": 1.4514594135261416, + "learning_rate": 1.3079220728701991e-06, + "loss": 0.28645598888397217, + "step": 3416 + }, + { + "epoch": 1.6884962313110097, + "grad_norm": 1.3145652794970974, + "learning_rate": 1.303885050120275e-06, + "loss": 0.2269624024629593, + "step": 3417 + }, + { + "epoch": 1.6889904856048437, + "grad_norm": 1.2380861054344243, + "learning_rate": 1.2998538327724386e-06, + "loss": 0.23601466417312622, + "step": 3418 + }, + { + "epoch": 1.6894847398986779, + "grad_norm": 1.4253359182592056, + "learning_rate": 1.2958284235178743e-06, + "loss": 0.2246169149875641, + "step": 3419 + }, + { + "epoch": 1.689978994192512, + "grad_norm": 1.497489718348998, + "learning_rate": 1.2918088250438865e-06, + "loss": 0.26519715785980225, + "step": 3420 + }, + { + "epoch": 1.6904732484863463, + "grad_norm": 1.443915314302877, + "learning_rate": 1.2877950400339046e-06, + "loss": 0.2590267062187195, + "step": 3421 + }, + { + "epoch": 1.6909675027801803, + "grad_norm": 1.3941822393799335, + "learning_rate": 1.2837870711674672e-06, + "loss": 0.2535945773124695, + "step": 3422 + }, + { + "epoch": 1.6914617570740145, + "grad_norm": 1.3833358145204437, + "learning_rate": 1.279784921120244e-06, + "loss": 0.21907874941825867, + "step": 3423 + }, + { + "epoch": 1.6919560113678487, + "grad_norm": 1.3775789573220893, + "learning_rate": 1.2757885925640124e-06, + "loss": 0.23314553499221802, + "step": 3424 + }, + { + "epoch": 1.6924502656616829, + "grad_norm": 1.2335650824399806, + "learning_rate": 1.2717980881666615e-06, + "loss": 0.2288433313369751, + "step": 3425 + }, + { + "epoch": 1.692944519955517, + "grad_norm": 1.3218922014839134, + "learning_rate": 1.2678134105921924e-06, + "loss": 0.2285449206829071, + "step": 3426 + }, + { + "epoch": 1.6934387742493513, + "grad_norm": 1.4061495134031399, + "learning_rate": 1.2638345625007287e-06, + "loss": 0.2898653447628021, + "step": 3427 + }, + { + "epoch": 1.6939330285431855, + "grad_norm": 1.3140964049835469, + "learning_rate": 1.2598615465484831e-06, + "loss": 0.23574519157409668, + "step": 3428 + }, + { + "epoch": 1.6944272828370197, + "grad_norm": 1.8163323929078987, + "learning_rate": 1.2558943653877887e-06, + "loss": 0.23385417461395264, + "step": 3429 + }, + { + "epoch": 1.694921537130854, + "grad_norm": 1.4332956021988026, + "learning_rate": 1.2519330216670766e-06, + "loss": 0.2555482089519501, + "step": 3430 + }, + { + "epoch": 1.6954157914246881, + "grad_norm": 1.3005186125236943, + "learning_rate": 1.247977518030885e-06, + "loss": 0.22221535444259644, + "step": 3431 + }, + { + "epoch": 1.6959100457185223, + "grad_norm": 1.2645213358789251, + "learning_rate": 1.2440278571198516e-06, + "loss": 0.21753090620040894, + "step": 3432 + }, + { + "epoch": 1.6964043000123563, + "grad_norm": 1.3199124302473737, + "learning_rate": 1.240084041570716e-06, + "loss": 0.2352944314479828, + "step": 3433 + }, + { + "epoch": 1.6968985543061905, + "grad_norm": 1.3019158889354874, + "learning_rate": 1.2361460740163045e-06, + "loss": 0.22581814229488373, + "step": 3434 + }, + { + "epoch": 1.6973928086000247, + "grad_norm": 1.5051457985045136, + "learning_rate": 1.2322139570855596e-06, + "loss": 0.28703421354293823, + "step": 3435 + }, + { + "epoch": 1.697887062893859, + "grad_norm": 1.2466294121854475, + "learning_rate": 1.2282876934034972e-06, + "loss": 0.21528789401054382, + "step": 3436 + }, + { + "epoch": 1.698381317187693, + "grad_norm": 1.3714652202926056, + "learning_rate": 1.2243672855912393e-06, + "loss": 0.2675422430038452, + "step": 3437 + }, + { + "epoch": 1.6988755714815271, + "grad_norm": 1.4468798550658835, + "learning_rate": 1.2204527362659913e-06, + "loss": 0.26681527495384216, + "step": 3438 + }, + { + "epoch": 1.6993698257753613, + "grad_norm": 1.6692863707132455, + "learning_rate": 1.216544048041054e-06, + "loss": 0.2436470091342926, + "step": 3439 + }, + { + "epoch": 1.6998640800691955, + "grad_norm": 1.3471564011899657, + "learning_rate": 1.212641223525809e-06, + "loss": 0.25458425283432007, + "step": 3440 + }, + { + "epoch": 1.7003583343630297, + "grad_norm": 1.5076141037655715, + "learning_rate": 1.2087442653257286e-06, + "loss": 0.24890559911727905, + "step": 3441 + }, + { + "epoch": 1.700852588656864, + "grad_norm": 1.2935321774740525, + "learning_rate": 1.2048531760423642e-06, + "loss": 0.26031816005706787, + "step": 3442 + }, + { + "epoch": 1.7013468429506982, + "grad_norm": 1.2852726465517723, + "learning_rate": 1.200967958273349e-06, + "loss": 0.22184975445270538, + "step": 3443 + }, + { + "epoch": 1.7018410972445324, + "grad_norm": 1.4055101079653758, + "learning_rate": 1.1970886146124073e-06, + "loss": 0.2670953571796417, + "step": 3444 + }, + { + "epoch": 1.7023353515383666, + "grad_norm": 1.4509425159233789, + "learning_rate": 1.1932151476493247e-06, + "loss": 0.27950525283813477, + "step": 3445 + }, + { + "epoch": 1.7028296058322008, + "grad_norm": 1.177838308027136, + "learning_rate": 1.1893475599699766e-06, + "loss": 0.23257380723953247, + "step": 3446 + }, + { + "epoch": 1.703323860126035, + "grad_norm": 1.33833163811184, + "learning_rate": 1.1854858541563086e-06, + "loss": 0.2586575746536255, + "step": 3447 + }, + { + "epoch": 1.703818114419869, + "grad_norm": 1.4079485154063143, + "learning_rate": 1.1816300327863406e-06, + "loss": 0.2677457928657532, + "step": 3448 + }, + { + "epoch": 1.7043123687137032, + "grad_norm": 1.565618455451115, + "learning_rate": 1.1777800984341637e-06, + "loss": 0.29866284132003784, + "step": 3449 + }, + { + "epoch": 1.7048066230075374, + "grad_norm": 1.3858480302164131, + "learning_rate": 1.1739360536699397e-06, + "loss": 0.27279675006866455, + "step": 3450 + }, + { + "epoch": 1.7053008773013716, + "grad_norm": 1.4265301971817403, + "learning_rate": 1.1700979010598945e-06, + "loss": 0.25695672631263733, + "step": 3451 + }, + { + "epoch": 1.7057951315952056, + "grad_norm": 1.2548676263466874, + "learning_rate": 1.1662656431663278e-06, + "loss": 0.22578787803649902, + "step": 3452 + }, + { + "epoch": 1.7062893858890398, + "grad_norm": 1.2884557931863843, + "learning_rate": 1.1624392825476016e-06, + "loss": 0.1946491301059723, + "step": 3453 + }, + { + "epoch": 1.706783640182874, + "grad_norm": 1.7214838792794764, + "learning_rate": 1.158618821758134e-06, + "loss": 0.2099667191505432, + "step": 3454 + }, + { + "epoch": 1.7072778944767082, + "grad_norm": 1.3956932051100446, + "learning_rate": 1.1548042633484148e-06, + "loss": 0.22660428285598755, + "step": 3455 + }, + { + "epoch": 1.7077721487705424, + "grad_norm": 1.486801447510752, + "learning_rate": 1.1509956098649855e-06, + "loss": 0.27378255128860474, + "step": 3456 + }, + { + "epoch": 1.7082664030643766, + "grad_norm": 1.3265929348116055, + "learning_rate": 1.1471928638504504e-06, + "loss": 0.2209164947271347, + "step": 3457 + }, + { + "epoch": 1.7087606573582108, + "grad_norm": 1.4225246621575494, + "learning_rate": 1.1433960278434687e-06, + "loss": 0.24310322105884552, + "step": 3458 + }, + { + "epoch": 1.709254911652045, + "grad_norm": 1.408175906725771, + "learning_rate": 1.1396051043787526e-06, + "loss": 0.23209068179130554, + "step": 3459 + }, + { + "epoch": 1.7097491659458792, + "grad_norm": 1.3815567972930465, + "learning_rate": 1.1358200959870703e-06, + "loss": 0.2514454126358032, + "step": 3460 + }, + { + "epoch": 1.7102434202397134, + "grad_norm": 1.4417631759146625, + "learning_rate": 1.132041005195239e-06, + "loss": 0.2580721378326416, + "step": 3461 + }, + { + "epoch": 1.7107376745335476, + "grad_norm": 1.3709268368925525, + "learning_rate": 1.1282678345261234e-06, + "loss": 0.26388949155807495, + "step": 3462 + }, + { + "epoch": 1.7112319288273818, + "grad_norm": 1.2783952905855267, + "learning_rate": 1.1245005864986402e-06, + "loss": 0.2194654643535614, + "step": 3463 + }, + { + "epoch": 1.7117261831212158, + "grad_norm": 1.2633121407835717, + "learning_rate": 1.1207392636277502e-06, + "loss": 0.2048814296722412, + "step": 3464 + }, + { + "epoch": 1.71222043741505, + "grad_norm": 1.33926020269927, + "learning_rate": 1.1169838684244584e-06, + "loss": 0.24165832996368408, + "step": 3465 + }, + { + "epoch": 1.7127146917088842, + "grad_norm": 1.3906329052137327, + "learning_rate": 1.1132344033958132e-06, + "loss": 0.2484482377767563, + "step": 3466 + }, + { + "epoch": 1.7132089460027184, + "grad_norm": 1.4564028814853938, + "learning_rate": 1.1094908710449048e-06, + "loss": 0.2406741827726364, + "step": 3467 + }, + { + "epoch": 1.7137032002965524, + "grad_norm": 1.4018531611252434, + "learning_rate": 1.1057532738708588e-06, + "loss": 0.2417721152305603, + "step": 3468 + }, + { + "epoch": 1.7141974545903866, + "grad_norm": 1.4560734194910743, + "learning_rate": 1.1020216143688446e-06, + "loss": 0.26304543018341064, + "step": 3469 + }, + { + "epoch": 1.7146917088842208, + "grad_norm": 1.476031518585943, + "learning_rate": 1.098295895030066e-06, + "loss": 0.30013689398765564, + "step": 3470 + }, + { + "epoch": 1.715185963178055, + "grad_norm": 1.3175345714713855, + "learning_rate": 1.0945761183417569e-06, + "loss": 0.21451817452907562, + "step": 3471 + }, + { + "epoch": 1.7156802174718893, + "grad_norm": 1.3300365419760627, + "learning_rate": 1.0908622867871854e-06, + "loss": 0.235377699136734, + "step": 3472 + }, + { + "epoch": 1.7161744717657235, + "grad_norm": 1.2866674867130445, + "learning_rate": 1.0871544028456594e-06, + "loss": 0.23560425639152527, + "step": 3473 + }, + { + "epoch": 1.7166687260595577, + "grad_norm": 1.3385949926310057, + "learning_rate": 1.083452468992503e-06, + "loss": 0.2431229054927826, + "step": 3474 + }, + { + "epoch": 1.7171629803533919, + "grad_norm": 1.2089508133597444, + "learning_rate": 1.0797564876990762e-06, + "loss": 0.211553692817688, + "step": 3475 + }, + { + "epoch": 1.717657234647226, + "grad_norm": 1.3533177183735723, + "learning_rate": 1.0760664614327643e-06, + "loss": 0.23565953969955444, + "step": 3476 + }, + { + "epoch": 1.7181514889410603, + "grad_norm": 1.328162178864468, + "learning_rate": 1.0723823926569744e-06, + "loss": 0.2052966058254242, + "step": 3477 + }, + { + "epoch": 1.7186457432348945, + "grad_norm": 1.3067945675468369, + "learning_rate": 1.06870428383114e-06, + "loss": 0.24831204116344452, + "step": 3478 + }, + { + "epoch": 1.7191399975287285, + "grad_norm": 1.273169118321956, + "learning_rate": 1.0650321374107142e-06, + "loss": 0.24706462025642395, + "step": 3479 + }, + { + "epoch": 1.7196342518225627, + "grad_norm": 1.4211234189057285, + "learning_rate": 1.0613659558471644e-06, + "loss": 0.20845818519592285, + "step": 3480 + }, + { + "epoch": 1.720128506116397, + "grad_norm": 1.2323642708024432, + "learning_rate": 1.0577057415879887e-06, + "loss": 0.21599797904491425, + "step": 3481 + }, + { + "epoch": 1.720622760410231, + "grad_norm": 1.4618240857831881, + "learning_rate": 1.054051497076689e-06, + "loss": 0.2381049394607544, + "step": 3482 + }, + { + "epoch": 1.721117014704065, + "grad_norm": 1.3155008449637104, + "learning_rate": 1.0504032247527874e-06, + "loss": 0.22402817010879517, + "step": 3483 + }, + { + "epoch": 1.7216112689978993, + "grad_norm": 1.5409902580545625, + "learning_rate": 1.0467609270518186e-06, + "loss": 0.24406251311302185, + "step": 3484 + }, + { + "epoch": 1.7221055232917335, + "grad_norm": 1.339222294791023, + "learning_rate": 1.0431246064053291e-06, + "loss": 0.24388936161994934, + "step": 3485 + }, + { + "epoch": 1.7225997775855677, + "grad_norm": 1.3265412686691833, + "learning_rate": 1.0394942652408735e-06, + "loss": 0.26131671667099, + "step": 3486 + }, + { + "epoch": 1.723094031879402, + "grad_norm": 1.3718768259485188, + "learning_rate": 1.0358699059820188e-06, + "loss": 0.247392475605011, + "step": 3487 + }, + { + "epoch": 1.7235882861732361, + "grad_norm": 1.335920284358623, + "learning_rate": 1.0322515310483316e-06, + "loss": 0.22713768482208252, + "step": 3488 + }, + { + "epoch": 1.7240825404670703, + "grad_norm": 1.3821197244420464, + "learning_rate": 1.0286391428553854e-06, + "loss": 0.2544357180595398, + "step": 3489 + }, + { + "epoch": 1.7245767947609045, + "grad_norm": 1.260460911336476, + "learning_rate": 1.0250327438147678e-06, + "loss": 0.23186656832695007, + "step": 3490 + }, + { + "epoch": 1.7250710490547387, + "grad_norm": 1.1804266448755296, + "learning_rate": 1.0214323363340506e-06, + "loss": 0.20387035608291626, + "step": 3491 + }, + { + "epoch": 1.725565303348573, + "grad_norm": 1.4265943405789598, + "learning_rate": 1.017837922816819e-06, + "loss": 0.25391846895217896, + "step": 3492 + }, + { + "epoch": 1.7260595576424071, + "grad_norm": 1.2603447890118837, + "learning_rate": 1.014249505662649e-06, + "loss": 0.23214812576770782, + "step": 3493 + }, + { + "epoch": 1.7265538119362414, + "grad_norm": 1.5899981641866812, + "learning_rate": 1.0106670872671187e-06, + "loss": 0.31888365745544434, + "step": 3494 + }, + { + "epoch": 1.7270480662300753, + "grad_norm": 1.2907611357867346, + "learning_rate": 1.0070906700217998e-06, + "loss": 0.23372362554073334, + "step": 3495 + }, + { + "epoch": 1.7275423205239095, + "grad_norm": 1.2449017093435057, + "learning_rate": 1.0035202563142577e-06, + "loss": 0.20082907378673553, + "step": 3496 + }, + { + "epoch": 1.7280365748177438, + "grad_norm": 1.3171397747083256, + "learning_rate": 9.99955848528046e-07, + "loss": 0.23895825445652008, + "step": 3497 + }, + { + "epoch": 1.728530829111578, + "grad_norm": 1.4142591511055072, + "learning_rate": 9.963974490427153e-07, + "loss": 0.30089694261550903, + "step": 3498 + }, + { + "epoch": 1.729025083405412, + "grad_norm": 1.4071492496267155, + "learning_rate": 9.928450602338046e-07, + "loss": 0.28134891390800476, + "step": 3499 + }, + { + "epoch": 1.7295193376992462, + "grad_norm": 1.239666390023503, + "learning_rate": 9.892986844728325e-07, + "loss": 0.1947125792503357, + "step": 3500 + }, + { + "epoch": 1.7300135919930804, + "grad_norm": 1.2560350647671819, + "learning_rate": 9.857583241273116e-07, + "loss": 0.252549409866333, + "step": 3501 + }, + { + "epoch": 1.7305078462869146, + "grad_norm": 1.8080125735095465, + "learning_rate": 9.82223981560736e-07, + "loss": 0.28061211109161377, + "step": 3502 + }, + { + "epoch": 1.7310021005807488, + "grad_norm": 1.3465400182463805, + "learning_rate": 9.786956591325813e-07, + "loss": 0.2492327094078064, + "step": 3503 + }, + { + "epoch": 1.731496354874583, + "grad_norm": 1.3114105920039891, + "learning_rate": 9.75173359198307e-07, + "loss": 0.20470373332500458, + "step": 3504 + }, + { + "epoch": 1.7319906091684172, + "grad_norm": 1.4582343704980485, + "learning_rate": 9.716570841093476e-07, + "loss": 0.24190351366996765, + "step": 3505 + }, + { + "epoch": 1.7324848634622514, + "grad_norm": 1.3916465638756335, + "learning_rate": 9.681468362131209e-07, + "loss": 0.28784725069999695, + "step": 3506 + }, + { + "epoch": 1.7329791177560856, + "grad_norm": 1.4872057430892556, + "learning_rate": 9.646426178530176e-07, + "loss": 0.2676560878753662, + "step": 3507 + }, + { + "epoch": 1.7334733720499198, + "grad_norm": 1.4118374661566944, + "learning_rate": 9.611444313684027e-07, + "loss": 0.2493928223848343, + "step": 3508 + }, + { + "epoch": 1.733967626343754, + "grad_norm": 1.272854491876895, + "learning_rate": 9.57652279094613e-07, + "loss": 0.23272472620010376, + "step": 3509 + }, + { + "epoch": 1.734461880637588, + "grad_norm": 1.3295460481124186, + "learning_rate": 9.541661633629662e-07, + "loss": 0.23245804011821747, + "step": 3510 + }, + { + "epoch": 1.7349561349314222, + "grad_norm": 1.318916212284511, + "learning_rate": 9.506860865007373e-07, + "loss": 0.22367024421691895, + "step": 3511 + }, + { + "epoch": 1.7354503892252564, + "grad_norm": 1.312738075120818, + "learning_rate": 9.472120508311788e-07, + "loss": 0.22332677245140076, + "step": 3512 + }, + { + "epoch": 1.7359446435190906, + "grad_norm": 1.3669711817276102, + "learning_rate": 9.437440586735081e-07, + "loss": 0.28051453828811646, + "step": 3513 + }, + { + "epoch": 1.7364388978129246, + "grad_norm": 1.5089189064457602, + "learning_rate": 9.402821123429017e-07, + "loss": 0.24815741181373596, + "step": 3514 + }, + { + "epoch": 1.7369331521067588, + "grad_norm": 1.338757796188803, + "learning_rate": 9.368262141505114e-07, + "loss": 0.24077603220939636, + "step": 3515 + }, + { + "epoch": 1.737427406400593, + "grad_norm": 1.419717776508751, + "learning_rate": 9.333763664034457e-07, + "loss": 0.24596062302589417, + "step": 3516 + }, + { + "epoch": 1.7379216606944272, + "grad_norm": 1.3347588363810814, + "learning_rate": 9.299325714047702e-07, + "loss": 0.22939634323120117, + "step": 3517 + }, + { + "epoch": 1.7384159149882614, + "grad_norm": 1.446015117761441, + "learning_rate": 9.264948314535116e-07, + "loss": 0.24870653450489044, + "step": 3518 + }, + { + "epoch": 1.7389101692820956, + "grad_norm": 1.2985600743859553, + "learning_rate": 9.23063148844664e-07, + "loss": 0.24589623510837555, + "step": 3519 + }, + { + "epoch": 1.7394044235759298, + "grad_norm": 1.3138002527909343, + "learning_rate": 9.196375258691615e-07, + "loss": 0.24228474497795105, + "step": 3520 + }, + { + "epoch": 1.739898677869764, + "grad_norm": 1.274631487561465, + "learning_rate": 9.162179648139047e-07, + "loss": 0.24371150135993958, + "step": 3521 + }, + { + "epoch": 1.7403929321635982, + "grad_norm": 1.279720023026326, + "learning_rate": 9.128044679617432e-07, + "loss": 0.24775750935077667, + "step": 3522 + }, + { + "epoch": 1.7408871864574325, + "grad_norm": 1.5257492514284694, + "learning_rate": 9.093970375914784e-07, + "loss": 0.2893243432044983, + "step": 3523 + }, + { + "epoch": 1.7413814407512667, + "grad_norm": 1.300861064044251, + "learning_rate": 9.059956759778632e-07, + "loss": 0.24014830589294434, + "step": 3524 + }, + { + "epoch": 1.7418756950451009, + "grad_norm": 1.399249837900177, + "learning_rate": 9.026003853915977e-07, + "loss": 0.21439003944396973, + "step": 3525 + }, + { + "epoch": 1.7423699493389349, + "grad_norm": 1.3253623378225632, + "learning_rate": 8.992111680993265e-07, + "loss": 0.23376847803592682, + "step": 3526 + }, + { + "epoch": 1.742864203632769, + "grad_norm": 1.3914877634645069, + "learning_rate": 8.958280263636487e-07, + "loss": 0.244795560836792, + "step": 3527 + }, + { + "epoch": 1.7433584579266033, + "grad_norm": 1.3847661327530765, + "learning_rate": 8.924509624430955e-07, + "loss": 0.2513751685619354, + "step": 3528 + }, + { + "epoch": 1.7438527122204373, + "grad_norm": 1.3808839230401615, + "learning_rate": 8.890799785921478e-07, + "loss": 0.2118893414735794, + "step": 3529 + }, + { + "epoch": 1.7443469665142715, + "grad_norm": 1.4606627623109902, + "learning_rate": 8.857150770612288e-07, + "loss": 0.2834109365940094, + "step": 3530 + }, + { + "epoch": 1.7448412208081057, + "grad_norm": 1.3959930901293698, + "learning_rate": 8.823562600966962e-07, + "loss": 0.2546151876449585, + "step": 3531 + }, + { + "epoch": 1.7453354751019399, + "grad_norm": 1.3410984246991777, + "learning_rate": 8.790035299408494e-07, + "loss": 0.2654607594013214, + "step": 3532 + }, + { + "epoch": 1.745829729395774, + "grad_norm": 1.4773453802832905, + "learning_rate": 8.756568888319239e-07, + "loss": 0.2720295786857605, + "step": 3533 + }, + { + "epoch": 1.7463239836896083, + "grad_norm": 1.3341271298777078, + "learning_rate": 8.723163390040856e-07, + "loss": 0.22259725630283356, + "step": 3534 + }, + { + "epoch": 1.7468182379834425, + "grad_norm": 1.3952830917524783, + "learning_rate": 8.68981882687443e-07, + "loss": 0.22918277978897095, + "step": 3535 + }, + { + "epoch": 1.7473124922772767, + "grad_norm": 1.4553860122555766, + "learning_rate": 8.656535221080297e-07, + "loss": 0.24396009743213654, + "step": 3536 + }, + { + "epoch": 1.747806746571111, + "grad_norm": 1.4530449395488945, + "learning_rate": 8.623312594878097e-07, + "loss": 0.2370900958776474, + "step": 3537 + }, + { + "epoch": 1.748301000864945, + "grad_norm": 1.4353409191789361, + "learning_rate": 8.590150970446798e-07, + "loss": 0.2785671055316925, + "step": 3538 + }, + { + "epoch": 1.7487952551587793, + "grad_norm": 1.3531168663907844, + "learning_rate": 8.557050369924624e-07, + "loss": 0.29365241527557373, + "step": 3539 + }, + { + "epoch": 1.7492895094526135, + "grad_norm": 1.3579124483240532, + "learning_rate": 8.524010815409068e-07, + "loss": 0.24052876234054565, + "step": 3540 + }, + { + "epoch": 1.7497837637464475, + "grad_norm": 1.5096531715278536, + "learning_rate": 8.49103232895685e-07, + "loss": 0.23938694596290588, + "step": 3541 + }, + { + "epoch": 1.7502780180402817, + "grad_norm": 1.2842245856075563, + "learning_rate": 8.458114932583961e-07, + "loss": 0.2244144231081009, + "step": 3542 + }, + { + "epoch": 1.750772272334116, + "grad_norm": 1.4659940645429403, + "learning_rate": 8.425258648265544e-07, + "loss": 0.25028878450393677, + "step": 3543 + }, + { + "epoch": 1.7512665266279501, + "grad_norm": 1.414718407414415, + "learning_rate": 8.39246349793602e-07, + "loss": 0.23135274648666382, + "step": 3544 + }, + { + "epoch": 1.751760780921784, + "grad_norm": 1.3004631081596045, + "learning_rate": 8.359729503488967e-07, + "loss": 0.23874548077583313, + "step": 3545 + }, + { + "epoch": 1.7522550352156183, + "grad_norm": 1.4912661633646227, + "learning_rate": 8.327056686777102e-07, + "loss": 0.2780659794807434, + "step": 3546 + }, + { + "epoch": 1.7527492895094525, + "grad_norm": 1.3424848463452685, + "learning_rate": 8.294445069612356e-07, + "loss": 0.213335320353508, + "step": 3547 + }, + { + "epoch": 1.7532435438032867, + "grad_norm": 1.3764395925344186, + "learning_rate": 8.261894673765757e-07, + "loss": 0.23284730315208435, + "step": 3548 + }, + { + "epoch": 1.753737798097121, + "grad_norm": 1.4152912967440003, + "learning_rate": 8.229405520967504e-07, + "loss": 0.25429633259773254, + "step": 3549 + }, + { + "epoch": 1.7542320523909551, + "grad_norm": 1.42166486412748, + "learning_rate": 8.196977632906877e-07, + "loss": 0.2519379258155823, + "step": 3550 + }, + { + "epoch": 1.7547263066847893, + "grad_norm": 1.3397514660513317, + "learning_rate": 8.164611031232283e-07, + "loss": 0.2510948181152344, + "step": 3551 + }, + { + "epoch": 1.7552205609786236, + "grad_norm": 1.4391737307664527, + "learning_rate": 8.132305737551193e-07, + "loss": 0.27415433526039124, + "step": 3552 + }, + { + "epoch": 1.7557148152724578, + "grad_norm": 1.4503824956137814, + "learning_rate": 8.100061773430179e-07, + "loss": 0.26723912358283997, + "step": 3553 + }, + { + "epoch": 1.756209069566292, + "grad_norm": 1.3305646078685684, + "learning_rate": 8.067879160394821e-07, + "loss": 0.2710701823234558, + "step": 3554 + }, + { + "epoch": 1.7567033238601262, + "grad_norm": 1.2981752509304552, + "learning_rate": 8.035757919929765e-07, + "loss": 0.23247234523296356, + "step": 3555 + }, + { + "epoch": 1.7571975781539602, + "grad_norm": 1.3788336069912301, + "learning_rate": 8.003698073478749e-07, + "loss": 0.2514559328556061, + "step": 3556 + }, + { + "epoch": 1.7576918324477944, + "grad_norm": 1.2669691261364102, + "learning_rate": 7.971699642444419e-07, + "loss": 0.23549199104309082, + "step": 3557 + }, + { + "epoch": 1.7581860867416286, + "grad_norm": 1.326325870924157, + "learning_rate": 7.939762648188476e-07, + "loss": 0.24511446058750153, + "step": 3558 + }, + { + "epoch": 1.7586803410354628, + "grad_norm": 1.244030857989509, + "learning_rate": 7.907887112031609e-07, + "loss": 0.18705075979232788, + "step": 3559 + }, + { + "epoch": 1.7591745953292968, + "grad_norm": 1.3163815425830492, + "learning_rate": 7.876073055253474e-07, + "loss": 0.24297048151493073, + "step": 3560 + }, + { + "epoch": 1.759668849623131, + "grad_norm": 1.3886968971610452, + "learning_rate": 7.844320499092683e-07, + "loss": 0.239119753241539, + "step": 3561 + }, + { + "epoch": 1.7601631039169652, + "grad_norm": 1.3716161630664097, + "learning_rate": 7.81262946474679e-07, + "loss": 0.2430122196674347, + "step": 3562 + }, + { + "epoch": 1.7606573582107994, + "grad_norm": 1.5018987096099226, + "learning_rate": 7.78099997337225e-07, + "loss": 0.2785049378871918, + "step": 3563 + }, + { + "epoch": 1.7611516125046336, + "grad_norm": 1.324774124882076, + "learning_rate": 7.749432046084471e-07, + "loss": 0.2451494038105011, + "step": 3564 + }, + { + "epoch": 1.7616458667984678, + "grad_norm": 1.2759037312949375, + "learning_rate": 7.717925703957785e-07, + "loss": 0.20071648061275482, + "step": 3565 + }, + { + "epoch": 1.762140121092302, + "grad_norm": 1.265455917769001, + "learning_rate": 7.686480968025333e-07, + "loss": 0.22308245301246643, + "step": 3566 + }, + { + "epoch": 1.7626343753861362, + "grad_norm": 1.4753453520092665, + "learning_rate": 7.655097859279192e-07, + "loss": 0.26082009077072144, + "step": 3567 + }, + { + "epoch": 1.7631286296799704, + "grad_norm": 1.2035646972809244, + "learning_rate": 7.623776398670268e-07, + "loss": 0.21026611328125, + "step": 3568 + }, + { + "epoch": 1.7636228839738046, + "grad_norm": 1.3616311603644673, + "learning_rate": 7.592516607108324e-07, + "loss": 0.23878465592861176, + "step": 3569 + }, + { + "epoch": 1.7641171382676388, + "grad_norm": 1.4512524044419246, + "learning_rate": 7.561318505461956e-07, + "loss": 0.30288150906562805, + "step": 3570 + }, + { + "epoch": 1.764611392561473, + "grad_norm": 1.3464088406966324, + "learning_rate": 7.530182114558582e-07, + "loss": 0.25749915838241577, + "step": 3571 + }, + { + "epoch": 1.765105646855307, + "grad_norm": 1.4850779133681176, + "learning_rate": 7.499107455184351e-07, + "loss": 0.23799163103103638, + "step": 3572 + }, + { + "epoch": 1.7655999011491412, + "grad_norm": 1.2970926183891958, + "learning_rate": 7.46809454808436e-07, + "loss": 0.2626670002937317, + "step": 3573 + }, + { + "epoch": 1.7660941554429754, + "grad_norm": 1.4394447645143165, + "learning_rate": 7.437143413962299e-07, + "loss": 0.23273026943206787, + "step": 3574 + }, + { + "epoch": 1.7665884097368096, + "grad_norm": 1.329151714167698, + "learning_rate": 7.406254073480735e-07, + "loss": 0.22592151165008545, + "step": 3575 + }, + { + "epoch": 1.7670826640306436, + "grad_norm": 1.4000212660765223, + "learning_rate": 7.375426547260944e-07, + "loss": 0.2594859004020691, + "step": 3576 + }, + { + "epoch": 1.7675769183244778, + "grad_norm": 1.2114788921542652, + "learning_rate": 7.344660855882946e-07, + "loss": 0.2161571979522705, + "step": 3577 + }, + { + "epoch": 1.768071172618312, + "grad_norm": 1.2669666342048183, + "learning_rate": 7.313957019885487e-07, + "loss": 0.23052990436553955, + "step": 3578 + }, + { + "epoch": 1.7685654269121462, + "grad_norm": 1.2921856609362714, + "learning_rate": 7.283315059766005e-07, + "loss": 0.2309163510799408, + "step": 3579 + }, + { + "epoch": 1.7690596812059804, + "grad_norm": 1.3800150012724666, + "learning_rate": 7.252734995980604e-07, + "loss": 0.24543863534927368, + "step": 3580 + }, + { + "epoch": 1.7695539354998147, + "grad_norm": 1.315509052214176, + "learning_rate": 7.22221684894413e-07, + "loss": 0.27616050839424133, + "step": 3581 + }, + { + "epoch": 1.7700481897936489, + "grad_norm": 1.5849292816622715, + "learning_rate": 7.191760639030077e-07, + "loss": 0.2247719019651413, + "step": 3582 + }, + { + "epoch": 1.770542444087483, + "grad_norm": 1.3600242028973613, + "learning_rate": 7.161366386570545e-07, + "loss": 0.28721702098846436, + "step": 3583 + }, + { + "epoch": 1.7710366983813173, + "grad_norm": 1.3444976293289765, + "learning_rate": 7.131034111856294e-07, + "loss": 0.24191290140151978, + "step": 3584 + }, + { + "epoch": 1.7715309526751515, + "grad_norm": 1.3549546462173616, + "learning_rate": 7.100763835136748e-07, + "loss": 0.24049970507621765, + "step": 3585 + }, + { + "epoch": 1.7720252069689857, + "grad_norm": 1.4855378384649431, + "learning_rate": 7.070555576619887e-07, + "loss": 0.255404531955719, + "step": 3586 + }, + { + "epoch": 1.7725194612628197, + "grad_norm": 1.3672964019576628, + "learning_rate": 7.040409356472333e-07, + "loss": 0.23041129112243652, + "step": 3587 + }, + { + "epoch": 1.7730137155566539, + "grad_norm": 1.3790812567511086, + "learning_rate": 7.010325194819278e-07, + "loss": 0.2589847147464752, + "step": 3588 + }, + { + "epoch": 1.773507969850488, + "grad_norm": 1.4114272066031652, + "learning_rate": 6.980303111744424e-07, + "loss": 0.2604563236236572, + "step": 3589 + }, + { + "epoch": 1.7740022241443223, + "grad_norm": 1.3786249354000182, + "learning_rate": 6.950343127290138e-07, + "loss": 0.26831385493278503, + "step": 3590 + }, + { + "epoch": 1.7744964784381563, + "grad_norm": 1.3398044201914234, + "learning_rate": 6.920445261457276e-07, + "loss": 0.20475032925605774, + "step": 3591 + }, + { + "epoch": 1.7749907327319905, + "grad_norm": 1.669693479578031, + "learning_rate": 6.890609534205206e-07, + "loss": 0.32378682494163513, + "step": 3592 + }, + { + "epoch": 1.7754849870258247, + "grad_norm": 1.4433175991642826, + "learning_rate": 6.86083596545184e-07, + "loss": 0.2526070177555084, + "step": 3593 + }, + { + "epoch": 1.775979241319659, + "grad_norm": 1.3738645357999373, + "learning_rate": 6.831124575073578e-07, + "loss": 0.2467537820339203, + "step": 3594 + }, + { + "epoch": 1.776473495613493, + "grad_norm": 1.4660741149631984, + "learning_rate": 6.801475382905332e-07, + "loss": 0.2857215404510498, + "step": 3595 + }, + { + "epoch": 1.7769677499073273, + "grad_norm": 1.4443968381596262, + "learning_rate": 6.771888408740479e-07, + "loss": 0.23615087568759918, + "step": 3596 + }, + { + "epoch": 1.7774620042011615, + "grad_norm": 1.451390021672748, + "learning_rate": 6.742363672330854e-07, + "loss": 0.2613365054130554, + "step": 3597 + }, + { + "epoch": 1.7779562584949957, + "grad_norm": 1.465141872886975, + "learning_rate": 6.712901193386756e-07, + "loss": 0.2558417320251465, + "step": 3598 + }, + { + "epoch": 1.77845051278883, + "grad_norm": 1.4467371641088191, + "learning_rate": 6.683500991576919e-07, + "loss": 0.2683117091655731, + "step": 3599 + }, + { + "epoch": 1.7789447670826641, + "grad_norm": 1.4625204738144366, + "learning_rate": 6.654163086528487e-07, + "loss": 0.2546064555644989, + "step": 3600 + }, + { + "epoch": 1.7794390213764983, + "grad_norm": 1.5872307428555623, + "learning_rate": 6.624887497827004e-07, + "loss": 0.2683906555175781, + "step": 3601 + }, + { + "epoch": 1.7799332756703325, + "grad_norm": 1.363900663564542, + "learning_rate": 6.595674245016492e-07, + "loss": 0.23260846734046936, + "step": 3602 + }, + { + "epoch": 1.7804275299641665, + "grad_norm": 1.3840728964244504, + "learning_rate": 6.566523347599252e-07, + "loss": 0.22884608805179596, + "step": 3603 + }, + { + "epoch": 1.7809217842580007, + "grad_norm": 1.3583647776279095, + "learning_rate": 6.537434825036027e-07, + "loss": 0.24236485362052917, + "step": 3604 + }, + { + "epoch": 1.781416038551835, + "grad_norm": 1.4869775379128283, + "learning_rate": 6.508408696745893e-07, + "loss": 0.29543957114219666, + "step": 3605 + }, + { + "epoch": 1.781910292845669, + "grad_norm": 1.3626399619539873, + "learning_rate": 6.479444982106276e-07, + "loss": 0.24011383950710297, + "step": 3606 + }, + { + "epoch": 1.7824045471395031, + "grad_norm": 1.3135116984072812, + "learning_rate": 6.450543700452949e-07, + "loss": 0.248407244682312, + "step": 3607 + }, + { + "epoch": 1.7828988014333373, + "grad_norm": 1.4089475770026854, + "learning_rate": 6.421704871080004e-07, + "loss": 0.2405746728181839, + "step": 3608 + }, + { + "epoch": 1.7833930557271716, + "grad_norm": 1.2522903384339197, + "learning_rate": 6.392928513239804e-07, + "loss": 0.24601790308952332, + "step": 3609 + }, + { + "epoch": 1.7838873100210058, + "grad_norm": 1.2436557177887422, + "learning_rate": 6.36421464614303e-07, + "loss": 0.20030242204666138, + "step": 3610 + }, + { + "epoch": 1.78438156431484, + "grad_norm": 1.3296983724782687, + "learning_rate": 6.335563288958691e-07, + "loss": 0.23858311772346497, + "step": 3611 + }, + { + "epoch": 1.7848758186086742, + "grad_norm": 1.4392435044249465, + "learning_rate": 6.306974460813986e-07, + "loss": 0.2330242097377777, + "step": 3612 + }, + { + "epoch": 1.7853700729025084, + "grad_norm": 1.445863340067418, + "learning_rate": 6.278448180794416e-07, + "loss": 0.25513261556625366, + "step": 3613 + }, + { + "epoch": 1.7858643271963426, + "grad_norm": 1.3248647587522469, + "learning_rate": 6.249984467943737e-07, + "loss": 0.2298405021429062, + "step": 3614 + }, + { + "epoch": 1.7863585814901768, + "grad_norm": 1.3090685428520892, + "learning_rate": 6.221583341263893e-07, + "loss": 0.22120623290538788, + "step": 3615 + }, + { + "epoch": 1.786852835784011, + "grad_norm": 1.3392765156774626, + "learning_rate": 6.193244819715072e-07, + "loss": 0.26976969838142395, + "step": 3616 + }, + { + "epoch": 1.7873470900778452, + "grad_norm": 1.3657180436845977, + "learning_rate": 6.164968922215697e-07, + "loss": 0.24354586005210876, + "step": 3617 + }, + { + "epoch": 1.7878413443716792, + "grad_norm": 1.4254233164600292, + "learning_rate": 6.136755667642302e-07, + "loss": 0.2849498689174652, + "step": 3618 + }, + { + "epoch": 1.7883355986655134, + "grad_norm": 1.2708453781613391, + "learning_rate": 6.10860507482971e-07, + "loss": 0.2431584596633911, + "step": 3619 + }, + { + "epoch": 1.7888298529593476, + "grad_norm": 1.5031154285158648, + "learning_rate": 6.080517162570809e-07, + "loss": 0.2384781688451767, + "step": 3620 + }, + { + "epoch": 1.7893241072531818, + "grad_norm": 1.45686854578023, + "learning_rate": 6.052491949616712e-07, + "loss": 0.23782339692115784, + "step": 3621 + }, + { + "epoch": 1.7898183615470158, + "grad_norm": 1.342733882676876, + "learning_rate": 6.024529454676631e-07, + "loss": 0.23293447494506836, + "step": 3622 + }, + { + "epoch": 1.79031261584085, + "grad_norm": 1.2930495337650696, + "learning_rate": 5.996629696417955e-07, + "loss": 0.21202662587165833, + "step": 3623 + }, + { + "epoch": 1.7908068701346842, + "grad_norm": 1.5889243123202152, + "learning_rate": 5.968792693466141e-07, + "loss": 0.27971768379211426, + "step": 3624 + }, + { + "epoch": 1.7913011244285184, + "grad_norm": 1.441999540970622, + "learning_rate": 5.94101846440478e-07, + "loss": 0.2433638721704483, + "step": 3625 + }, + { + "epoch": 1.7917953787223526, + "grad_norm": 1.3682285780053611, + "learning_rate": 5.91330702777555e-07, + "loss": 0.21812602877616882, + "step": 3626 + }, + { + "epoch": 1.7922896330161868, + "grad_norm": 1.924541384200403, + "learning_rate": 5.88565840207822e-07, + "loss": 0.2135028839111328, + "step": 3627 + }, + { + "epoch": 1.792783887310021, + "grad_norm": 1.3226125497456243, + "learning_rate": 5.858072605770626e-07, + "loss": 0.23919226229190826, + "step": 3628 + }, + { + "epoch": 1.7932781416038552, + "grad_norm": 1.3008122554752455, + "learning_rate": 5.830549657268614e-07, + "loss": 0.2495008111000061, + "step": 3629 + }, + { + "epoch": 1.7937723958976894, + "grad_norm": 1.4679589100669386, + "learning_rate": 5.80308957494613e-07, + "loss": 0.2531805634498596, + "step": 3630 + }, + { + "epoch": 1.7942666501915236, + "grad_norm": 1.2654762717037664, + "learning_rate": 5.775692377135156e-07, + "loss": 0.22644619643688202, + "step": 3631 + }, + { + "epoch": 1.7947609044853579, + "grad_norm": 1.2567004368149646, + "learning_rate": 5.748358082125638e-07, + "loss": 0.2264411598443985, + "step": 3632 + }, + { + "epoch": 1.7952551587791918, + "grad_norm": 1.3206987713043599, + "learning_rate": 5.721086708165568e-07, + "loss": 0.2663921117782593, + "step": 3633 + }, + { + "epoch": 1.795749413073026, + "grad_norm": 1.35703763331278, + "learning_rate": 5.693878273460951e-07, + "loss": 0.2398051619529724, + "step": 3634 + }, + { + "epoch": 1.7962436673668603, + "grad_norm": 1.4184943078470147, + "learning_rate": 5.6667327961757e-07, + "loss": 0.28781580924987793, + "step": 3635 + }, + { + "epoch": 1.7967379216606945, + "grad_norm": 2.1761368991988084, + "learning_rate": 5.639650294431787e-07, + "loss": 0.2232055813074112, + "step": 3636 + }, + { + "epoch": 1.7972321759545284, + "grad_norm": 1.402577073030083, + "learning_rate": 5.612630786309103e-07, + "loss": 0.23214340209960938, + "step": 3637 + }, + { + "epoch": 1.7977264302483627, + "grad_norm": 1.2714718799747338, + "learning_rate": 5.585674289845467e-07, + "loss": 0.21598659455776215, + "step": 3638 + }, + { + "epoch": 1.7982206845421969, + "grad_norm": 1.351029180109128, + "learning_rate": 5.558780823036658e-07, + "loss": 0.2760176956653595, + "step": 3639 + }, + { + "epoch": 1.798714938836031, + "grad_norm": 1.3941723061811673, + "learning_rate": 5.531950403836373e-07, + "loss": 0.2641429901123047, + "step": 3640 + }, + { + "epoch": 1.7992091931298653, + "grad_norm": 1.390874465362023, + "learning_rate": 5.505183050156204e-07, + "loss": 0.2407502382993698, + "step": 3641 + }, + { + "epoch": 1.7997034474236995, + "grad_norm": 1.2164247841450622, + "learning_rate": 5.478478779865682e-07, + "loss": 0.19910940527915955, + "step": 3642 + }, + { + "epoch": 1.8001977017175337, + "grad_norm": 1.4412656091937792, + "learning_rate": 5.451837610792166e-07, + "loss": 0.2716234624385834, + "step": 3643 + }, + { + "epoch": 1.800691956011368, + "grad_norm": 1.3284477963142056, + "learning_rate": 5.42525956072093e-07, + "loss": 0.2784198224544525, + "step": 3644 + }, + { + "epoch": 1.801186210305202, + "grad_norm": 1.3444314874013155, + "learning_rate": 5.398744647395104e-07, + "loss": 0.2277904599905014, + "step": 3645 + }, + { + "epoch": 1.8016804645990363, + "grad_norm": 1.4299842617414134, + "learning_rate": 5.372292888515684e-07, + "loss": 0.26788002252578735, + "step": 3646 + }, + { + "epoch": 1.8021747188928705, + "grad_norm": 1.3607541160674654, + "learning_rate": 5.345904301741445e-07, + "loss": 0.22452175617218018, + "step": 3647 + }, + { + "epoch": 1.8026689731867047, + "grad_norm": 1.44450101040719, + "learning_rate": 5.319578904689071e-07, + "loss": 0.2337179332971573, + "step": 3648 + }, + { + "epoch": 1.8031632274805387, + "grad_norm": 1.3116281040368842, + "learning_rate": 5.293316714932983e-07, + "loss": 0.2614130973815918, + "step": 3649 + }, + { + "epoch": 1.803657481774373, + "grad_norm": 1.3142722561763884, + "learning_rate": 5.267117750005468e-07, + "loss": 0.2577320635318756, + "step": 3650 + }, + { + "epoch": 1.8041517360682071, + "grad_norm": 1.231846526151871, + "learning_rate": 5.24098202739658e-07, + "loss": 0.2058672308921814, + "step": 3651 + }, + { + "epoch": 1.8046459903620413, + "grad_norm": 1.3970882237865128, + "learning_rate": 5.214909564554138e-07, + "loss": 0.25223514437675476, + "step": 3652 + }, + { + "epoch": 1.8051402446558753, + "grad_norm": 1.3683940041570406, + "learning_rate": 5.188900378883765e-07, + "loss": 0.25651872158050537, + "step": 3653 + }, + { + "epoch": 1.8056344989497095, + "grad_norm": 1.3167902113360206, + "learning_rate": 5.162954487748828e-07, + "loss": 0.257855623960495, + "step": 3654 + }, + { + "epoch": 1.8061287532435437, + "grad_norm": 1.3408137381423195, + "learning_rate": 5.137071908470381e-07, + "loss": 0.22942093014717102, + "step": 3655 + }, + { + "epoch": 1.806623007537378, + "grad_norm": 1.3905585042591802, + "learning_rate": 5.111252658327326e-07, + "loss": 0.25629153847694397, + "step": 3656 + }, + { + "epoch": 1.8071172618312121, + "grad_norm": 1.3417957205977868, + "learning_rate": 5.085496754556207e-07, + "loss": 0.23882299661636353, + "step": 3657 + }, + { + "epoch": 1.8076115161250463, + "grad_norm": 1.3092883951034957, + "learning_rate": 5.059804214351283e-07, + "loss": 0.2323160469532013, + "step": 3658 + }, + { + "epoch": 1.8081057704188805, + "grad_norm": 1.318607555394289, + "learning_rate": 5.034175054864531e-07, + "loss": 0.2080869972705841, + "step": 3659 + }, + { + "epoch": 1.8086000247127147, + "grad_norm": 1.476319660825777, + "learning_rate": 5.008609293205624e-07, + "loss": 0.22439511120319366, + "step": 3660 + }, + { + "epoch": 1.809094279006549, + "grad_norm": 1.3639928518895943, + "learning_rate": 4.983106946441885e-07, + "loss": 0.2527809739112854, + "step": 3661 + }, + { + "epoch": 1.8095885333003832, + "grad_norm": 1.181172468164539, + "learning_rate": 4.957668031598328e-07, + "loss": 0.2149294763803482, + "step": 3662 + }, + { + "epoch": 1.8100827875942174, + "grad_norm": 1.3244234520799762, + "learning_rate": 4.932292565657615e-07, + "loss": 0.2471565306186676, + "step": 3663 + }, + { + "epoch": 1.8105770418880514, + "grad_norm": 1.328701941509414, + "learning_rate": 4.906980565560004e-07, + "loss": 0.25820282101631165, + "step": 3664 + }, + { + "epoch": 1.8110712961818856, + "grad_norm": 1.4538113944792308, + "learning_rate": 4.881732048203469e-07, + "loss": 0.2815645933151245, + "step": 3665 + }, + { + "epoch": 1.8115655504757198, + "grad_norm": 1.4078938194960222, + "learning_rate": 4.856547030443559e-07, + "loss": 0.23443330824375153, + "step": 3666 + }, + { + "epoch": 1.812059804769554, + "grad_norm": 1.413689966723704, + "learning_rate": 4.831425529093403e-07, + "loss": 0.2452373206615448, + "step": 3667 + }, + { + "epoch": 1.812554059063388, + "grad_norm": 1.2405057526282826, + "learning_rate": 4.806367560923764e-07, + "loss": 0.21815839409828186, + "step": 3668 + }, + { + "epoch": 1.8130483133572222, + "grad_norm": 1.3418751770168684, + "learning_rate": 4.781373142663003e-07, + "loss": 0.23436316847801208, + "step": 3669 + }, + { + "epoch": 1.8135425676510564, + "grad_norm": 1.277189547676361, + "learning_rate": 4.75644229099701e-07, + "loss": 0.18917132914066315, + "step": 3670 + }, + { + "epoch": 1.8140368219448906, + "grad_norm": 1.3842801505047626, + "learning_rate": 4.7315750225692905e-07, + "loss": 0.24570351839065552, + "step": 3671 + }, + { + "epoch": 1.8145310762387248, + "grad_norm": 1.2514343072057177, + "learning_rate": 4.7067713539808543e-07, + "loss": 0.23367956280708313, + "step": 3672 + }, + { + "epoch": 1.815025330532559, + "grad_norm": 1.372723501995688, + "learning_rate": 4.682031301790291e-07, + "loss": 0.24563322961330414, + "step": 3673 + }, + { + "epoch": 1.8155195848263932, + "grad_norm": 1.3552399849082646, + "learning_rate": 4.6573548825137204e-07, + "loss": 0.2425815761089325, + "step": 3674 + }, + { + "epoch": 1.8160138391202274, + "grad_norm": 1.2732667032266225, + "learning_rate": 4.632742112624744e-07, + "loss": 0.2173803597688675, + "step": 3675 + }, + { + "epoch": 1.8165080934140616, + "grad_norm": 1.4674070434763509, + "learning_rate": 4.6081930085544734e-07, + "loss": 0.2665477395057678, + "step": 3676 + }, + { + "epoch": 1.8170023477078958, + "grad_norm": 1.2335396057121188, + "learning_rate": 4.5837075866915994e-07, + "loss": 0.23834756016731262, + "step": 3677 + }, + { + "epoch": 1.81749660200173, + "grad_norm": 1.3614176095599289, + "learning_rate": 4.55928586338219e-07, + "loss": 0.2479294240474701, + "step": 3678 + }, + { + "epoch": 1.8179908562955642, + "grad_norm": 1.370567608566195, + "learning_rate": 4.5349278549298716e-07, + "loss": 0.24136531352996826, + "step": 3679 + }, + { + "epoch": 1.8184851105893982, + "grad_norm": 1.3881148070094378, + "learning_rate": 4.510633577595669e-07, + "loss": 0.24397623538970947, + "step": 3680 + }, + { + "epoch": 1.8189793648832324, + "grad_norm": 1.3189259944629108, + "learning_rate": 4.48640304759812e-07, + "loss": 0.27078694105148315, + "step": 3681 + }, + { + "epoch": 1.8194736191770666, + "grad_norm": 1.5222352072420349, + "learning_rate": 4.4622362811131745e-07, + "loss": 0.2544251084327698, + "step": 3682 + }, + { + "epoch": 1.8199678734709008, + "grad_norm": 1.3696668102162666, + "learning_rate": 4.4381332942742384e-07, + "loss": 0.2528873682022095, + "step": 3683 + }, + { + "epoch": 1.8204621277647348, + "grad_norm": 1.470119432024013, + "learning_rate": 4.414094103172084e-07, + "loss": 0.25487592816352844, + "step": 3684 + }, + { + "epoch": 1.820956382058569, + "grad_norm": 1.3872878168023053, + "learning_rate": 4.3901187238549414e-07, + "loss": 0.22061187028884888, + "step": 3685 + }, + { + "epoch": 1.8214506363524032, + "grad_norm": 1.355863796177502, + "learning_rate": 4.366207172328452e-07, + "loss": 0.2793615758419037, + "step": 3686 + }, + { + "epoch": 1.8219448906462374, + "grad_norm": 1.2429295933181803, + "learning_rate": 4.342359464555612e-07, + "loss": 0.2323140949010849, + "step": 3687 + }, + { + "epoch": 1.8224391449400716, + "grad_norm": 1.370663497944958, + "learning_rate": 4.3185756164568104e-07, + "loss": 0.2616409659385681, + "step": 3688 + }, + { + "epoch": 1.8229333992339058, + "grad_norm": 1.3843956978002738, + "learning_rate": 4.294855643909812e-07, + "loss": 0.203874871134758, + "step": 3689 + }, + { + "epoch": 1.82342765352774, + "grad_norm": 1.2289114807067458, + "learning_rate": 4.271199562749717e-07, + "loss": 0.2272878736257553, + "step": 3690 + }, + { + "epoch": 1.8239219078215743, + "grad_norm": 1.338434972419624, + "learning_rate": 4.247607388769004e-07, + "loss": 0.23728047311306, + "step": 3691 + }, + { + "epoch": 1.8244161621154085, + "grad_norm": 1.4750745226923418, + "learning_rate": 4.2240791377174737e-07, + "loss": 0.2570911943912506, + "step": 3692 + }, + { + "epoch": 1.8249104164092427, + "grad_norm": 1.4969254471055817, + "learning_rate": 4.200614825302207e-07, + "loss": 0.24265727400779724, + "step": 3693 + }, + { + "epoch": 1.8254046707030769, + "grad_norm": 1.405819385173928, + "learning_rate": 4.177214467187707e-07, + "loss": 0.24822816252708435, + "step": 3694 + }, + { + "epoch": 1.8258989249969109, + "grad_norm": 1.3218266218091017, + "learning_rate": 4.153878078995677e-07, + "loss": 0.23382046818733215, + "step": 3695 + }, + { + "epoch": 1.826393179290745, + "grad_norm": 1.4037010093048616, + "learning_rate": 4.130605676305166e-07, + "loss": 0.27590304613113403, + "step": 3696 + }, + { + "epoch": 1.8268874335845793, + "grad_norm": 1.4161501438852775, + "learning_rate": 4.1073972746525026e-07, + "loss": 0.25702038407325745, + "step": 3697 + }, + { + "epoch": 1.8273816878784135, + "grad_norm": 1.488627338365754, + "learning_rate": 4.0842528895312707e-07, + "loss": 0.28980135917663574, + "step": 3698 + }, + { + "epoch": 1.8278759421722475, + "grad_norm": 1.5075437506896323, + "learning_rate": 4.0611725363923435e-07, + "loss": 0.22739271819591522, + "step": 3699 + }, + { + "epoch": 1.8283701964660817, + "grad_norm": 1.4671495030162094, + "learning_rate": 4.038156230643853e-07, + "loss": 0.26396334171295166, + "step": 3700 + }, + { + "epoch": 1.8288644507599159, + "grad_norm": 1.5855861974203058, + "learning_rate": 4.015203987651106e-07, + "loss": 0.25548964738845825, + "step": 3701 + }, + { + "epoch": 1.82935870505375, + "grad_norm": 1.3315259515817186, + "learning_rate": 3.992315822736725e-07, + "loss": 0.22227105498313904, + "step": 3702 + }, + { + "epoch": 1.8298529593475843, + "grad_norm": 1.445413897274288, + "learning_rate": 3.969491751180543e-07, + "loss": 0.30854254961013794, + "step": 3703 + }, + { + "epoch": 1.8303472136414185, + "grad_norm": 1.4678349464130562, + "learning_rate": 3.946731788219538e-07, + "loss": 0.27471429109573364, + "step": 3704 + }, + { + "epoch": 1.8308414679352527, + "grad_norm": 1.334822235698922, + "learning_rate": 3.924035949047955e-07, + "loss": 0.2317768633365631, + "step": 3705 + }, + { + "epoch": 1.831335722229087, + "grad_norm": 1.4197098897896443, + "learning_rate": 3.901404248817231e-07, + "loss": 0.2450723946094513, + "step": 3706 + }, + { + "epoch": 1.8318299765229211, + "grad_norm": 1.4676009490842072, + "learning_rate": 3.878836702635935e-07, + "loss": 0.2428039014339447, + "step": 3707 + }, + { + "epoch": 1.8323242308167553, + "grad_norm": 1.4376208196933993, + "learning_rate": 3.856333325569861e-07, + "loss": 0.27869629859924316, + "step": 3708 + }, + { + "epoch": 1.8328184851105895, + "grad_norm": 1.2808253694997749, + "learning_rate": 3.8338941326419353e-07, + "loss": 0.21661749482154846, + "step": 3709 + }, + { + "epoch": 1.8333127394044237, + "grad_norm": 1.3452610575891626, + "learning_rate": 3.8115191388322206e-07, + "loss": 0.2655249834060669, + "step": 3710 + }, + { + "epoch": 1.8338069936982577, + "grad_norm": 1.3643896556477109, + "learning_rate": 3.7892083590779784e-07, + "loss": 0.2281903475522995, + "step": 3711 + }, + { + "epoch": 1.834301247992092, + "grad_norm": 1.492937654145658, + "learning_rate": 3.7669618082735504e-07, + "loss": 0.24545446038246155, + "step": 3712 + }, + { + "epoch": 1.8347955022859261, + "grad_norm": 1.2788794377367898, + "learning_rate": 3.7447795012704237e-07, + "loss": 0.24749556183815002, + "step": 3713 + }, + { + "epoch": 1.8352897565797601, + "grad_norm": 1.4606135919595513, + "learning_rate": 3.722661452877163e-07, + "loss": 0.26234689354896545, + "step": 3714 + }, + { + "epoch": 1.8357840108735943, + "grad_norm": 1.3697239858165842, + "learning_rate": 3.700607677859491e-07, + "loss": 0.21348389983177185, + "step": 3715 + }, + { + "epoch": 1.8362782651674285, + "grad_norm": 1.3198403259649356, + "learning_rate": 3.6786181909401864e-07, + "loss": 0.2527744770050049, + "step": 3716 + }, + { + "epoch": 1.8367725194612627, + "grad_norm": 1.3153305717810528, + "learning_rate": 3.6566930067991056e-07, + "loss": 0.2175026535987854, + "step": 3717 + }, + { + "epoch": 1.837266773755097, + "grad_norm": 1.3795015677920492, + "learning_rate": 3.6348321400731967e-07, + "loss": 0.2847272753715515, + "step": 3718 + }, + { + "epoch": 1.8377610280489312, + "grad_norm": 1.4885049894439106, + "learning_rate": 3.613035605356463e-07, + "loss": 0.2549072504043579, + "step": 3719 + }, + { + "epoch": 1.8382552823427654, + "grad_norm": 1.3444222427486383, + "learning_rate": 3.591303417199965e-07, + "loss": 0.24534013867378235, + "step": 3720 + }, + { + "epoch": 1.8387495366365996, + "grad_norm": 1.461602538702394, + "learning_rate": 3.5696355901117865e-07, + "loss": 0.25336408615112305, + "step": 3721 + }, + { + "epoch": 1.8392437909304338, + "grad_norm": 1.4932038589381658, + "learning_rate": 3.548032138557056e-07, + "loss": 0.2787632346153259, + "step": 3722 + }, + { + "epoch": 1.839738045224268, + "grad_norm": 1.3687827308256, + "learning_rate": 3.5264930769579595e-07, + "loss": 0.22364875674247742, + "step": 3723 + }, + { + "epoch": 1.8402322995181022, + "grad_norm": 1.509493433022075, + "learning_rate": 3.5050184196936285e-07, + "loss": 0.2526230216026306, + "step": 3724 + }, + { + "epoch": 1.8407265538119364, + "grad_norm": 1.449998297788816, + "learning_rate": 3.483608181100262e-07, + "loss": 0.2412932962179184, + "step": 3725 + }, + { + "epoch": 1.8412208081057704, + "grad_norm": 1.4100243345912178, + "learning_rate": 3.462262375471026e-07, + "loss": 0.28693705797195435, + "step": 3726 + }, + { + "epoch": 1.8417150623996046, + "grad_norm": 1.4369299703462226, + "learning_rate": 3.4409810170560667e-07, + "loss": 0.2600281834602356, + "step": 3727 + }, + { + "epoch": 1.8422093166934388, + "grad_norm": 1.3702328145360616, + "learning_rate": 3.4197641200625185e-07, + "loss": 0.24885150790214539, + "step": 3728 + }, + { + "epoch": 1.842703570987273, + "grad_norm": 1.476451776245579, + "learning_rate": 3.398611698654497e-07, + "loss": 0.27185115218162537, + "step": 3729 + }, + { + "epoch": 1.843197825281107, + "grad_norm": 1.6779196665373166, + "learning_rate": 3.377523766953006e-07, + "loss": 0.2999323010444641, + "step": 3730 + }, + { + "epoch": 1.8436920795749412, + "grad_norm": 1.3755033406487114, + "learning_rate": 3.356500339036106e-07, + "loss": 0.22807806730270386, + "step": 3731 + }, + { + "epoch": 1.8441863338687754, + "grad_norm": 1.4727836521575108, + "learning_rate": 3.3355414289387155e-07, + "loss": 0.23006726801395416, + "step": 3732 + }, + { + "epoch": 1.8446805881626096, + "grad_norm": 1.4892072813513704, + "learning_rate": 3.314647050652686e-07, + "loss": 0.25261276960372925, + "step": 3733 + }, + { + "epoch": 1.8451748424564438, + "grad_norm": 1.3741598151970273, + "learning_rate": 3.293817218126827e-07, + "loss": 0.2484148144721985, + "step": 3734 + }, + { + "epoch": 1.845669096750278, + "grad_norm": 1.2679669997107472, + "learning_rate": 3.273051945266836e-07, + "loss": 0.2472834438085556, + "step": 3735 + }, + { + "epoch": 1.8461633510441122, + "grad_norm": 1.16756829401485, + "learning_rate": 3.2523512459352923e-07, + "loss": 0.20510706305503845, + "step": 3736 + }, + { + "epoch": 1.8466576053379464, + "grad_norm": 1.292644423038628, + "learning_rate": 3.231715133951707e-07, + "loss": 0.2331993281841278, + "step": 3737 + }, + { + "epoch": 1.8471518596317806, + "grad_norm": 1.4584815860954135, + "learning_rate": 3.211143623092461e-07, + "loss": 0.2704228162765503, + "step": 3738 + }, + { + "epoch": 1.8476461139256148, + "grad_norm": 1.4579018041488718, + "learning_rate": 3.190636727090768e-07, + "loss": 0.2514714002609253, + "step": 3739 + }, + { + "epoch": 1.848140368219449, + "grad_norm": 1.258977256920419, + "learning_rate": 3.170194459636777e-07, + "loss": 0.2396089732646942, + "step": 3740 + }, + { + "epoch": 1.848634622513283, + "grad_norm": 1.4139144003983488, + "learning_rate": 3.149816834377428e-07, + "loss": 0.266484797000885, + "step": 3741 + }, + { + "epoch": 1.8491288768071172, + "grad_norm": 1.338105672337281, + "learning_rate": 3.129503864916539e-07, + "loss": 0.24549749493598938, + "step": 3742 + }, + { + "epoch": 1.8496231311009514, + "grad_norm": 1.6902480251834826, + "learning_rate": 3.1092555648147615e-07, + "loss": 0.2659090757369995, + "step": 3743 + }, + { + "epoch": 1.8501173853947857, + "grad_norm": 1.4018081288366548, + "learning_rate": 3.0890719475895615e-07, + "loss": 0.2756732702255249, + "step": 3744 + }, + { + "epoch": 1.8506116396886196, + "grad_norm": 1.3509953718874834, + "learning_rate": 3.068953026715238e-07, + "loss": 0.2568710148334503, + "step": 3745 + }, + { + "epoch": 1.8511058939824538, + "grad_norm": 1.3512798325752944, + "learning_rate": 3.048898815622914e-07, + "loss": 0.2255566120147705, + "step": 3746 + }, + { + "epoch": 1.851600148276288, + "grad_norm": 1.309385732750396, + "learning_rate": 3.028909327700458e-07, + "loss": 0.2083941102027893, + "step": 3747 + }, + { + "epoch": 1.8520944025701223, + "grad_norm": 1.2287507621351796, + "learning_rate": 3.0089845762926063e-07, + "loss": 0.20739290118217468, + "step": 3748 + }, + { + "epoch": 1.8525886568639565, + "grad_norm": 1.2356251229389228, + "learning_rate": 2.989124574700819e-07, + "loss": 0.21835210919380188, + "step": 3749 + }, + { + "epoch": 1.8530829111577907, + "grad_norm": 1.312598409351232, + "learning_rate": 2.969329336183335e-07, + "loss": 0.2170596569776535, + "step": 3750 + }, + { + "epoch": 1.8535771654516249, + "grad_norm": 1.3990932569701935, + "learning_rate": 2.949598873955184e-07, + "loss": 0.23584111034870148, + "step": 3751 + }, + { + "epoch": 1.854071419745459, + "grad_norm": 1.5531646127161125, + "learning_rate": 2.9299332011881623e-07, + "loss": 0.2690342664718628, + "step": 3752 + }, + { + "epoch": 1.8545656740392933, + "grad_norm": 1.2634424740078676, + "learning_rate": 2.9103323310107566e-07, + "loss": 0.2499091923236847, + "step": 3753 + }, + { + "epoch": 1.8550599283331275, + "grad_norm": 1.417744173198578, + "learning_rate": 2.8907962765082567e-07, + "loss": 0.23112377524375916, + "step": 3754 + }, + { + "epoch": 1.8555541826269617, + "grad_norm": 1.375590332914505, + "learning_rate": 2.8713250507226285e-07, + "loss": 0.25203657150268555, + "step": 3755 + }, + { + "epoch": 1.856048436920796, + "grad_norm": 1.4015552448571456, + "learning_rate": 2.8519186666526086e-07, + "loss": 0.2468508780002594, + "step": 3756 + }, + { + "epoch": 1.85654269121463, + "grad_norm": 1.427563584784084, + "learning_rate": 2.8325771372536e-07, + "loss": 0.22745928168296814, + "step": 3757 + }, + { + "epoch": 1.857036945508464, + "grad_norm": 1.2932963376428803, + "learning_rate": 2.8133004754377525e-07, + "loss": 0.23090660572052002, + "step": 3758 + }, + { + "epoch": 1.8575311998022983, + "grad_norm": 1.420318152152914, + "learning_rate": 2.7940886940738707e-07, + "loss": 0.27513352036476135, + "step": 3759 + }, + { + "epoch": 1.8580254540961325, + "grad_norm": 1.4517333399175874, + "learning_rate": 2.774941805987474e-07, + "loss": 0.25791019201278687, + "step": 3760 + }, + { + "epoch": 1.8585197083899665, + "grad_norm": 1.523404531013776, + "learning_rate": 2.75585982396076e-07, + "loss": 0.2703961730003357, + "step": 3761 + }, + { + "epoch": 1.8590139626838007, + "grad_norm": 1.4198437134006967, + "learning_rate": 2.736842760732561e-07, + "loss": 0.2557608485221863, + "step": 3762 + }, + { + "epoch": 1.859508216977635, + "grad_norm": 1.4276231211370918, + "learning_rate": 2.717890628998421e-07, + "loss": 0.26276740431785583, + "step": 3763 + }, + { + "epoch": 1.8600024712714691, + "grad_norm": 1.3830597360775128, + "learning_rate": 2.699003441410508e-07, + "loss": 0.3033446967601776, + "step": 3764 + }, + { + "epoch": 1.8604967255653033, + "grad_norm": 1.3975518004533982, + "learning_rate": 2.680181210577637e-07, + "loss": 0.2513597905635834, + "step": 3765 + }, + { + "epoch": 1.8609909798591375, + "grad_norm": 1.2527716887935596, + "learning_rate": 2.661423949065267e-07, + "loss": 0.22935059666633606, + "step": 3766 + }, + { + "epoch": 1.8614852341529717, + "grad_norm": 1.5028347517247218, + "learning_rate": 2.6427316693954596e-07, + "loss": 0.2585369348526001, + "step": 3767 + }, + { + "epoch": 1.861979488446806, + "grad_norm": 1.4129565265857094, + "learning_rate": 2.6241043840469104e-07, + "loss": 0.25701645016670227, + "step": 3768 + }, + { + "epoch": 1.8624737427406401, + "grad_norm": 1.304405538262163, + "learning_rate": 2.605542105454961e-07, + "loss": 0.24622182548046112, + "step": 3769 + }, + { + "epoch": 1.8629679970344744, + "grad_norm": 1.380891732165765, + "learning_rate": 2.5870448460114994e-07, + "loss": 0.2650758624076843, + "step": 3770 + }, + { + "epoch": 1.8634622513283086, + "grad_norm": 1.4721649336836553, + "learning_rate": 2.568612618065036e-07, + "loss": 0.2364269644021988, + "step": 3771 + }, + { + "epoch": 1.8639565056221425, + "grad_norm": 1.2217358212004363, + "learning_rate": 2.5502454339206617e-07, + "loss": 0.23226915299892426, + "step": 3772 + }, + { + "epoch": 1.8644507599159768, + "grad_norm": 1.3407554644381927, + "learning_rate": 2.5319433058400565e-07, + "loss": 0.23077306151390076, + "step": 3773 + }, + { + "epoch": 1.864945014209811, + "grad_norm": 1.289395146095016, + "learning_rate": 2.5137062460414476e-07, + "loss": 0.23707103729248047, + "step": 3774 + }, + { + "epoch": 1.8654392685036452, + "grad_norm": 1.3571808886592325, + "learning_rate": 2.4955342666996505e-07, + "loss": 0.268571138381958, + "step": 3775 + }, + { + "epoch": 1.8659335227974791, + "grad_norm": 1.4298616373621023, + "learning_rate": 2.4774273799459847e-07, + "loss": 0.21469517052173615, + "step": 3776 + }, + { + "epoch": 1.8664277770913134, + "grad_norm": 1.302386517113681, + "learning_rate": 2.45938559786838e-07, + "loss": 0.2513999938964844, + "step": 3777 + }, + { + "epoch": 1.8669220313851476, + "grad_norm": 1.2688339559395354, + "learning_rate": 2.44140893251128e-07, + "loss": 0.23660680651664734, + "step": 3778 + }, + { + "epoch": 1.8674162856789818, + "grad_norm": 1.499995655954345, + "learning_rate": 2.423497395875618e-07, + "loss": 0.24594557285308838, + "step": 3779 + }, + { + "epoch": 1.867910539972816, + "grad_norm": 1.4315211319459857, + "learning_rate": 2.405650999918896e-07, + "loss": 0.2725435793399811, + "step": 3780 + }, + { + "epoch": 1.8684047942666502, + "grad_norm": 1.3565937935517103, + "learning_rate": 2.3878697565551167e-07, + "loss": 0.25718316435813904, + "step": 3781 + }, + { + "epoch": 1.8688990485604844, + "grad_norm": 1.3523272274009415, + "learning_rate": 2.3701536776547851e-07, + "loss": 0.2546181082725525, + "step": 3782 + }, + { + "epoch": 1.8693933028543186, + "grad_norm": 1.1875597307843324, + "learning_rate": 2.3525027750448959e-07, + "loss": 0.22146770358085632, + "step": 3783 + }, + { + "epoch": 1.8698875571481528, + "grad_norm": 1.5616036933474096, + "learning_rate": 2.3349170605089456e-07, + "loss": 0.23873519897460938, + "step": 3784 + }, + { + "epoch": 1.870381811441987, + "grad_norm": 1.3056198220614723, + "learning_rate": 2.3173965457868875e-07, + "loss": 0.2530808746814728, + "step": 3785 + }, + { + "epoch": 1.8708760657358212, + "grad_norm": 1.5174642956273923, + "learning_rate": 2.2999412425751987e-07, + "loss": 0.21616236865520477, + "step": 3786 + }, + { + "epoch": 1.8713703200296554, + "grad_norm": 1.3867713509711206, + "learning_rate": 2.2825511625267583e-07, + "loss": 0.21596969664096832, + "step": 3787 + }, + { + "epoch": 1.8718645743234894, + "grad_norm": 1.4557650561795843, + "learning_rate": 2.265226317250957e-07, + "loss": 0.25873616337776184, + "step": 3788 + }, + { + "epoch": 1.8723588286173236, + "grad_norm": 1.3108065941801126, + "learning_rate": 2.247966718313599e-07, + "loss": 0.21096865832805634, + "step": 3789 + }, + { + "epoch": 1.8728530829111578, + "grad_norm": 1.374596799099242, + "learning_rate": 2.230772377236956e-07, + "loss": 0.2159111499786377, + "step": 3790 + }, + { + "epoch": 1.8733473372049918, + "grad_norm": 1.3658642346441578, + "learning_rate": 2.213643305499724e-07, + "loss": 0.2264566719532013, + "step": 3791 + }, + { + "epoch": 1.873841591498826, + "grad_norm": 1.2529368730648867, + "learning_rate": 2.1965795145370338e-07, + "loss": 0.216034397482872, + "step": 3792 + }, + { + "epoch": 1.8743358457926602, + "grad_norm": 1.2144868387665828, + "learning_rate": 2.1795810157404063e-07, + "loss": 0.22257745265960693, + "step": 3793 + }, + { + "epoch": 1.8748301000864944, + "grad_norm": 1.5075158608293073, + "learning_rate": 2.1626478204578082e-07, + "loss": 0.2569161653518677, + "step": 3794 + }, + { + "epoch": 1.8753243543803286, + "grad_norm": 1.3028902539101006, + "learning_rate": 2.1457799399936087e-07, + "loss": 0.24172556400299072, + "step": 3795 + }, + { + "epoch": 1.8758186086741628, + "grad_norm": 1.4100197142967315, + "learning_rate": 2.128977385608555e-07, + "loss": 0.25539106130599976, + "step": 3796 + }, + { + "epoch": 1.876312862967997, + "grad_norm": 1.3564195764364628, + "learning_rate": 2.1122401685197747e-07, + "loss": 0.23766650259494781, + "step": 3797 + }, + { + "epoch": 1.8768071172618312, + "grad_norm": 2.0847437292387516, + "learning_rate": 2.095568299900841e-07, + "loss": 0.24102288484573364, + "step": 3798 + }, + { + "epoch": 1.8773013715556655, + "grad_norm": 1.4163898812472968, + "learning_rate": 2.0789617908816063e-07, + "loss": 0.25168395042419434, + "step": 3799 + }, + { + "epoch": 1.8777956258494997, + "grad_norm": 1.2853968722580162, + "learning_rate": 2.0624206525483582e-07, + "loss": 0.23417149484157562, + "step": 3800 + }, + { + "epoch": 1.8782898801433339, + "grad_norm": 1.4002834822702614, + "learning_rate": 2.04594489594373e-07, + "loss": 0.2875264883041382, + "step": 3801 + }, + { + "epoch": 1.878784134437168, + "grad_norm": 1.3714454637927955, + "learning_rate": 2.0295345320667014e-07, + "loss": 0.24828693270683289, + "step": 3802 + }, + { + "epoch": 1.879278388731002, + "grad_norm": 1.3521250596424406, + "learning_rate": 2.013189571872587e-07, + "loss": 0.23279064893722534, + "step": 3803 + }, + { + "epoch": 1.8797726430248363, + "grad_norm": 1.1425181629308492, + "learning_rate": 1.996910026273058e-07, + "loss": 0.2099420577287674, + "step": 3804 + }, + { + "epoch": 1.8802668973186705, + "grad_norm": 1.346362344532125, + "learning_rate": 1.9806959061360985e-07, + "loss": 0.25043174624443054, + "step": 3805 + }, + { + "epoch": 1.8807611516125047, + "grad_norm": 1.3680517059526944, + "learning_rate": 1.9645472222860286e-07, + "loss": 0.2606011927127838, + "step": 3806 + }, + { + "epoch": 1.8812554059063387, + "grad_norm": 1.2606250431650987, + "learning_rate": 1.948463985503468e-07, + "loss": 0.22487565875053406, + "step": 3807 + }, + { + "epoch": 1.8817496602001729, + "grad_norm": 1.6823729371263936, + "learning_rate": 1.9324462065253735e-07, + "loss": 0.29611343145370483, + "step": 3808 + }, + { + "epoch": 1.882243914494007, + "grad_norm": 1.282763458334529, + "learning_rate": 1.9164938960449685e-07, + "loss": 0.2301706224679947, + "step": 3809 + }, + { + "epoch": 1.8827381687878413, + "grad_norm": 1.319243063789466, + "learning_rate": 1.9006070647118015e-07, + "loss": 0.2306794822216034, + "step": 3810 + }, + { + "epoch": 1.8832324230816755, + "grad_norm": 1.4208055299495237, + "learning_rate": 1.884785723131688e-07, + "loss": 0.2588786482810974, + "step": 3811 + }, + { + "epoch": 1.8837266773755097, + "grad_norm": 1.527285475263959, + "learning_rate": 1.8690298818667463e-07, + "loss": 0.2795346677303314, + "step": 3812 + }, + { + "epoch": 1.884220931669344, + "grad_norm": 1.2499989201376016, + "learning_rate": 1.853339551435318e-07, + "loss": 0.2313271164894104, + "step": 3813 + }, + { + "epoch": 1.884715185963178, + "grad_norm": 1.4803115521216077, + "learning_rate": 1.8377147423120467e-07, + "loss": 0.22814632952213287, + "step": 3814 + }, + { + "epoch": 1.8852094402570123, + "grad_norm": 1.3259243101199787, + "learning_rate": 1.822155464927866e-07, + "loss": 0.2605836093425751, + "step": 3815 + }, + { + "epoch": 1.8857036945508465, + "grad_norm": 1.3976508324913761, + "learning_rate": 1.8066617296699007e-07, + "loss": 0.23902952671051025, + "step": 3816 + }, + { + "epoch": 1.8861979488446807, + "grad_norm": 1.290435692515394, + "learning_rate": 1.7912335468815545e-07, + "loss": 0.24895761907100677, + "step": 3817 + }, + { + "epoch": 1.8866922031385147, + "grad_norm": 1.4446135232841222, + "learning_rate": 1.7758709268624664e-07, + "loss": 0.24108648300170898, + "step": 3818 + }, + { + "epoch": 1.887186457432349, + "grad_norm": 1.4071508146495701, + "learning_rate": 1.7605738798684767e-07, + "loss": 0.2600073516368866, + "step": 3819 + }, + { + "epoch": 1.8876807117261831, + "grad_norm": 1.3261487318829528, + "learning_rate": 1.745342416111706e-07, + "loss": 0.21564190089702606, + "step": 3820 + }, + { + "epoch": 1.8881749660200173, + "grad_norm": 1.4577577895280622, + "learning_rate": 1.7301765457604647e-07, + "loss": 0.24080556631088257, + "step": 3821 + }, + { + "epoch": 1.8886692203138513, + "grad_norm": 1.316642170468449, + "learning_rate": 1.7150762789392316e-07, + "loss": 0.22631056606769562, + "step": 3822 + }, + { + "epoch": 1.8891634746076855, + "grad_norm": 1.4341533325292704, + "learning_rate": 1.7000416257287654e-07, + "loss": 0.26355087757110596, + "step": 3823 + }, + { + "epoch": 1.8896577289015197, + "grad_norm": 1.387410149780388, + "learning_rate": 1.685072596165982e-07, + "loss": 0.248369500041008, + "step": 3824 + }, + { + "epoch": 1.890151983195354, + "grad_norm": 1.4331472853704903, + "learning_rate": 1.670169200243976e-07, + "loss": 0.2789249122142792, + "step": 3825 + }, + { + "epoch": 1.8906462374891881, + "grad_norm": 1.2052406993380367, + "learning_rate": 1.6553314479120453e-07, + "loss": 0.22493675351142883, + "step": 3826 + }, + { + "epoch": 1.8911404917830223, + "grad_norm": 1.2074956449276386, + "learning_rate": 1.6405593490756766e-07, + "loss": 0.21274074912071228, + "step": 3827 + }, + { + "epoch": 1.8916347460768566, + "grad_norm": 1.3986179942656674, + "learning_rate": 1.6258529135964928e-07, + "loss": 0.2591193914413452, + "step": 3828 + }, + { + "epoch": 1.8921290003706908, + "grad_norm": 1.5077061888652343, + "learning_rate": 1.6112121512923075e-07, + "loss": 0.2791387140750885, + "step": 3829 + }, + { + "epoch": 1.892623254664525, + "grad_norm": 1.449596307066075, + "learning_rate": 1.5966370719371015e-07, + "loss": 0.2840545177459717, + "step": 3830 + }, + { + "epoch": 1.8931175089583592, + "grad_norm": 1.538114321399184, + "learning_rate": 1.582127685260948e-07, + "loss": 0.2563555836677551, + "step": 3831 + }, + { + "epoch": 1.8936117632521934, + "grad_norm": 1.2897284655116197, + "learning_rate": 1.5676840009501538e-07, + "loss": 0.22912704944610596, + "step": 3832 + }, + { + "epoch": 1.8941060175460276, + "grad_norm": 1.3733822665309192, + "learning_rate": 1.5533060286470837e-07, + "loss": 0.25490787625312805, + "step": 3833 + }, + { + "epoch": 1.8946002718398616, + "grad_norm": 1.2282031018618578, + "learning_rate": 1.5389937779502818e-07, + "loss": 0.21826709806919098, + "step": 3834 + }, + { + "epoch": 1.8950945261336958, + "grad_norm": 1.303626845787231, + "learning_rate": 1.524747258414394e-07, + "loss": 0.2292749583721161, + "step": 3835 + }, + { + "epoch": 1.89558878042753, + "grad_norm": 1.3359905611934206, + "learning_rate": 1.5105664795501908e-07, + "loss": 0.24652332067489624, + "step": 3836 + }, + { + "epoch": 1.8960830347213642, + "grad_norm": 1.3777956922677133, + "learning_rate": 1.4964514508245652e-07, + "loss": 0.25154706835746765, + "step": 3837 + }, + { + "epoch": 1.8965772890151982, + "grad_norm": 1.3722697572324272, + "learning_rate": 1.482402181660525e-07, + "loss": 0.2414158582687378, + "step": 3838 + }, + { + "epoch": 1.8970715433090324, + "grad_norm": 1.3960215733148371, + "learning_rate": 1.4684186814371225e-07, + "loss": 0.22421908378601074, + "step": 3839 + }, + { + "epoch": 1.8975657976028666, + "grad_norm": 1.3337706977662172, + "learning_rate": 1.4545009594895687e-07, + "loss": 0.2506029009819031, + "step": 3840 + }, + { + "epoch": 1.8980600518967008, + "grad_norm": 1.239516400526973, + "learning_rate": 1.440649025109142e-07, + "loss": 0.2011726200580597, + "step": 3841 + }, + { + "epoch": 1.898554306190535, + "grad_norm": 1.5242598019660087, + "learning_rate": 1.4268628875431677e-07, + "loss": 0.27702796459198, + "step": 3842 + }, + { + "epoch": 1.8990485604843692, + "grad_norm": 1.236260659855922, + "learning_rate": 1.413142555995095e-07, + "loss": 0.23884715139865875, + "step": 3843 + }, + { + "epoch": 1.8995428147782034, + "grad_norm": 1.2385068593263413, + "learning_rate": 1.3994880396244304e-07, + "loss": 0.2191702425479889, + "step": 3844 + }, + { + "epoch": 1.9000370690720376, + "grad_norm": 1.3532676134331167, + "learning_rate": 1.385899347546704e-07, + "loss": 0.25425833463668823, + "step": 3845 + }, + { + "epoch": 1.9005313233658718, + "grad_norm": 1.3452712776781028, + "learning_rate": 1.37237648883356e-07, + "loss": 0.23355990648269653, + "step": 3846 + }, + { + "epoch": 1.901025577659706, + "grad_norm": 1.200878562022238, + "learning_rate": 1.3589194725126542e-07, + "loss": 0.2079685628414154, + "step": 3847 + }, + { + "epoch": 1.9015198319535402, + "grad_norm": 1.380798956497921, + "learning_rate": 1.3455283075676895e-07, + "loss": 0.25126928091049194, + "step": 3848 + }, + { + "epoch": 1.9020140862473742, + "grad_norm": 1.3306751541769635, + "learning_rate": 1.332203002938437e-07, + "loss": 0.2608864903450012, + "step": 3849 + }, + { + "epoch": 1.9025083405412084, + "grad_norm": 1.3536846944777874, + "learning_rate": 1.3189435675206697e-07, + "loss": 0.27048414945602417, + "step": 3850 + }, + { + "epoch": 1.9030025948350426, + "grad_norm": 1.3873264194773522, + "learning_rate": 1.3057500101661846e-07, + "loss": 0.24350577592849731, + "step": 3851 + }, + { + "epoch": 1.9034968491288768, + "grad_norm": 1.5060374095399143, + "learning_rate": 1.2926223396828363e-07, + "loss": 0.23283880949020386, + "step": 3852 + }, + { + "epoch": 1.9039911034227108, + "grad_norm": 1.3722502195381412, + "learning_rate": 1.2795605648344477e-07, + "loss": 0.23332493007183075, + "step": 3853 + }, + { + "epoch": 1.904485357716545, + "grad_norm": 1.2805992535782373, + "learning_rate": 1.2665646943408882e-07, + "loss": 0.19833901524543762, + "step": 3854 + }, + { + "epoch": 1.9049796120103792, + "grad_norm": 1.316108497317141, + "learning_rate": 1.2536347368780066e-07, + "loss": 0.23650333285331726, + "step": 3855 + }, + { + "epoch": 1.9054738663042134, + "grad_norm": 1.1749486485284195, + "learning_rate": 1.240770701077665e-07, + "loss": 0.20151859521865845, + "step": 3856 + }, + { + "epoch": 1.9059681205980477, + "grad_norm": 1.4620220273758984, + "learning_rate": 1.2279725955277044e-07, + "loss": 0.32347559928894043, + "step": 3857 + }, + { + "epoch": 1.9064623748918819, + "grad_norm": 1.2726582104041342, + "learning_rate": 1.215240428771969e-07, + "loss": 0.25937923789024353, + "step": 3858 + }, + { + "epoch": 1.906956629185716, + "grad_norm": 1.6959402751075685, + "learning_rate": 1.2025742093102477e-07, + "loss": 0.2648822069168091, + "step": 3859 + }, + { + "epoch": 1.9074508834795503, + "grad_norm": 1.4639245582336404, + "learning_rate": 1.1899739455983327e-07, + "loss": 0.27612054347991943, + "step": 3860 + }, + { + "epoch": 1.9079451377733845, + "grad_norm": 1.32342317481008, + "learning_rate": 1.1774396460480064e-07, + "loss": 0.2204264998435974, + "step": 3861 + }, + { + "epoch": 1.9084393920672187, + "grad_norm": 1.4448526349141402, + "learning_rate": 1.164971319026964e-07, + "loss": 0.2719968557357788, + "step": 3862 + }, + { + "epoch": 1.908933646361053, + "grad_norm": 1.3288093626980793, + "learning_rate": 1.1525689728588807e-07, + "loss": 0.2308243364095688, + "step": 3863 + }, + { + "epoch": 1.909427900654887, + "grad_norm": 1.405242953564276, + "learning_rate": 1.1402326158234e-07, + "loss": 0.23281638324260712, + "step": 3864 + }, + { + "epoch": 1.909922154948721, + "grad_norm": 1.553800687505842, + "learning_rate": 1.127962256156101e-07, + "loss": 0.26273444294929504, + "step": 3865 + }, + { + "epoch": 1.9104164092425553, + "grad_norm": 1.3311046226223713, + "learning_rate": 1.1157579020484755e-07, + "loss": 0.26783496141433716, + "step": 3866 + }, + { + "epoch": 1.9109106635363895, + "grad_norm": 1.4482920311066827, + "learning_rate": 1.1036195616480061e-07, + "loss": 0.2575075626373291, + "step": 3867 + }, + { + "epoch": 1.9114049178302237, + "grad_norm": 1.3313207733281058, + "learning_rate": 1.0915472430580443e-07, + "loss": 0.24802085757255554, + "step": 3868 + }, + { + "epoch": 1.9118991721240577, + "grad_norm": 1.230518560175702, + "learning_rate": 1.0795409543379099e-07, + "loss": 0.22017821669578552, + "step": 3869 + }, + { + "epoch": 1.912393426417892, + "grad_norm": 1.3804831257002024, + "learning_rate": 1.0676007035028579e-07, + "loss": 0.2525743246078491, + "step": 3870 + }, + { + "epoch": 1.912887680711726, + "grad_norm": 1.5674388988470875, + "learning_rate": 1.05572649852399e-07, + "loss": 0.26704782247543335, + "step": 3871 + }, + { + "epoch": 1.9133819350055603, + "grad_norm": 3.430480948746706, + "learning_rate": 1.0439183473283654e-07, + "loss": 0.25393134355545044, + "step": 3872 + }, + { + "epoch": 1.9138761892993945, + "grad_norm": 1.4465108879454651, + "learning_rate": 1.0321762577989448e-07, + "loss": 0.27266988158226013, + "step": 3873 + }, + { + "epoch": 1.9143704435932287, + "grad_norm": 1.366912603525092, + "learning_rate": 1.0205002377745799e-07, + "loss": 0.2694425582885742, + "step": 3874 + }, + { + "epoch": 1.914864697887063, + "grad_norm": 1.394500016346508, + "learning_rate": 1.0088902950500023e-07, + "loss": 0.28820598125457764, + "step": 3875 + }, + { + "epoch": 1.9153589521808971, + "grad_norm": 1.3050023577266547, + "learning_rate": 9.973464373758679e-08, + "loss": 0.2194051444530487, + "step": 3876 + }, + { + "epoch": 1.9158532064747313, + "grad_norm": 1.3831603392475145, + "learning_rate": 9.858686724586675e-08, + "loss": 0.25639402866363525, + "step": 3877 + }, + { + "epoch": 1.9163474607685655, + "grad_norm": 1.2744346736321277, + "learning_rate": 9.744570079608051e-08, + "loss": 0.23420584201812744, + "step": 3878 + }, + { + "epoch": 1.9168417150623998, + "grad_norm": 1.38639151316596, + "learning_rate": 9.631114515005425e-08, + "loss": 0.2514578700065613, + "step": 3879 + }, + { + "epoch": 1.9173359693562337, + "grad_norm": 1.296540814966686, + "learning_rate": 9.518320106520096e-08, + "loss": 0.2223532646894455, + "step": 3880 + }, + { + "epoch": 1.917830223650068, + "grad_norm": 1.367450022954602, + "learning_rate": 9.406186929451943e-08, + "loss": 0.21725934743881226, + "step": 3881 + }, + { + "epoch": 1.9183244779439022, + "grad_norm": 1.2939049219304557, + "learning_rate": 9.294715058659531e-08, + "loss": 0.2081519365310669, + "step": 3882 + }, + { + "epoch": 1.9188187322377364, + "grad_norm": 1.4148048553245687, + "learning_rate": 9.183904568559998e-08, + "loss": 0.23683780431747437, + "step": 3883 + }, + { + "epoch": 1.9193129865315703, + "grad_norm": 1.3217345576155297, + "learning_rate": 9.073755533128725e-08, + "loss": 0.26095467805862427, + "step": 3884 + }, + { + "epoch": 1.9198072408254045, + "grad_norm": 1.253461281568054, + "learning_rate": 8.964268025899558e-08, + "loss": 0.24427568912506104, + "step": 3885 + }, + { + "epoch": 1.9203014951192388, + "grad_norm": 1.3603609343742546, + "learning_rate": 8.855442119964919e-08, + "loss": 0.23549365997314453, + "step": 3886 + }, + { + "epoch": 1.920795749413073, + "grad_norm": 1.4769071310965274, + "learning_rate": 8.74727788797547e-08, + "loss": 0.2645740807056427, + "step": 3887 + }, + { + "epoch": 1.9212900037069072, + "grad_norm": 1.3315198325383535, + "learning_rate": 8.639775402139894e-08, + "loss": 0.22890612483024597, + "step": 3888 + }, + { + "epoch": 1.9217842580007414, + "grad_norm": 1.4439303401955232, + "learning_rate": 8.532934734225451e-08, + "loss": 0.23417067527770996, + "step": 3889 + }, + { + "epoch": 1.9222785122945756, + "grad_norm": 1.3482339584478593, + "learning_rate": 8.42675595555753e-08, + "loss": 0.26125872135162354, + "step": 3890 + }, + { + "epoch": 1.9227727665884098, + "grad_norm": 1.4420298418522868, + "learning_rate": 8.321239137019433e-08, + "loss": 0.26559343934059143, + "step": 3891 + }, + { + "epoch": 1.923267020882244, + "grad_norm": 1.188066329993037, + "learning_rate": 8.216384349052809e-08, + "loss": 0.2033136785030365, + "step": 3892 + }, + { + "epoch": 1.9237612751760782, + "grad_norm": 1.975689815636208, + "learning_rate": 8.112191661656999e-08, + "loss": 0.2750868797302246, + "step": 3893 + }, + { + "epoch": 1.9242555294699124, + "grad_norm": 1.366292176712638, + "learning_rate": 8.008661144389807e-08, + "loss": 0.2082993984222412, + "step": 3894 + }, + { + "epoch": 1.9247497837637466, + "grad_norm": 1.4608755297303442, + "learning_rate": 7.905792866366501e-08, + "loss": 0.2495439350605011, + "step": 3895 + }, + { + "epoch": 1.9252440380575806, + "grad_norm": 1.4141233844295813, + "learning_rate": 7.803586896260707e-08, + "loss": 0.25609591603279114, + "step": 3896 + }, + { + "epoch": 1.9257382923514148, + "grad_norm": 1.5334004898395663, + "learning_rate": 7.702043302303397e-08, + "loss": 0.25372135639190674, + "step": 3897 + }, + { + "epoch": 1.926232546645249, + "grad_norm": 1.3368221554281705, + "learning_rate": 7.601162152283904e-08, + "loss": 0.21882784366607666, + "step": 3898 + }, + { + "epoch": 1.926726800939083, + "grad_norm": 1.5284992426615736, + "learning_rate": 7.500943513548797e-08, + "loss": 0.24513296782970428, + "step": 3899 + }, + { + "epoch": 1.9272210552329172, + "grad_norm": 1.3036631509681367, + "learning_rate": 7.401387453002673e-08, + "loss": 0.23508042097091675, + "step": 3900 + }, + { + "epoch": 1.9277153095267514, + "grad_norm": 1.2751462486235168, + "learning_rate": 7.30249403710792e-08, + "loss": 0.2288282811641693, + "step": 3901 + }, + { + "epoch": 1.9282095638205856, + "grad_norm": 1.4342484579443016, + "learning_rate": 7.204263331884175e-08, + "loss": 0.24606133997440338, + "step": 3902 + }, + { + "epoch": 1.9287038181144198, + "grad_norm": 1.3623815600739415, + "learning_rate": 7.10669540290887e-08, + "loss": 0.2710507810115814, + "step": 3903 + }, + { + "epoch": 1.929198072408254, + "grad_norm": 1.3748292603956795, + "learning_rate": 7.009790315317122e-08, + "loss": 0.27333927154541016, + "step": 3904 + }, + { + "epoch": 1.9296923267020882, + "grad_norm": 1.3028025790213729, + "learning_rate": 6.913548133801074e-08, + "loss": 0.27518531680107117, + "step": 3905 + }, + { + "epoch": 1.9301865809959224, + "grad_norm": 1.3661226136758882, + "learning_rate": 6.817968922610884e-08, + "loss": 0.24289458990097046, + "step": 3906 + }, + { + "epoch": 1.9306808352897566, + "grad_norm": 1.3726485965253954, + "learning_rate": 6.723052745553848e-08, + "loss": 0.225175678730011, + "step": 3907 + }, + { + "epoch": 1.9311750895835909, + "grad_norm": 1.4678815751521954, + "learning_rate": 6.628799665994612e-08, + "loss": 0.2592085599899292, + "step": 3908 + }, + { + "epoch": 1.931669343877425, + "grad_norm": 1.5719300045981148, + "learning_rate": 6.535209746855064e-08, + "loss": 0.2649756968021393, + "step": 3909 + }, + { + "epoch": 1.9321635981712593, + "grad_norm": 1.3380899824561678, + "learning_rate": 6.442283050614673e-08, + "loss": 0.2318311631679535, + "step": 3910 + }, + { + "epoch": 1.9326578524650933, + "grad_norm": 1.6231265342953554, + "learning_rate": 6.350019639309923e-08, + "loss": 0.252924382686615, + "step": 3911 + }, + { + "epoch": 1.9331521067589275, + "grad_norm": 1.1670510769577984, + "learning_rate": 6.258419574534547e-08, + "loss": 0.1903652548789978, + "step": 3912 + }, + { + "epoch": 1.9336463610527617, + "grad_norm": 1.270678601269557, + "learning_rate": 6.167482917439404e-08, + "loss": 0.22795221209526062, + "step": 3913 + }, + { + "epoch": 1.9341406153465959, + "grad_norm": 1.5197318429157889, + "learning_rate": 6.077209728732492e-08, + "loss": 0.26521584391593933, + "step": 3914 + }, + { + "epoch": 1.9346348696404299, + "grad_norm": 1.272486350308544, + "learning_rate": 5.987600068679045e-08, + "loss": 0.22152049839496613, + "step": 3915 + }, + { + "epoch": 1.935129123934264, + "grad_norm": 1.2727416096160045, + "learning_rate": 5.898653997100989e-08, + "loss": 0.22663083672523499, + "step": 3916 + }, + { + "epoch": 1.9356233782280983, + "grad_norm": 1.3553153320714941, + "learning_rate": 5.8103715733776047e-08, + "loss": 0.23720389604568481, + "step": 3917 + }, + { + "epoch": 1.9361176325219325, + "grad_norm": 1.4063431471110097, + "learning_rate": 5.722752856444858e-08, + "loss": 0.24053935706615448, + "step": 3918 + }, + { + "epoch": 1.9366118868157667, + "grad_norm": 1.3614412415474415, + "learning_rate": 5.635797904795848e-08, + "loss": 0.26565641164779663, + "step": 3919 + }, + { + "epoch": 1.9371061411096009, + "grad_norm": 1.4288462330405298, + "learning_rate": 5.5495067764804736e-08, + "loss": 0.27181264758110046, + "step": 3920 + }, + { + "epoch": 1.937600395403435, + "grad_norm": 1.3077579832623365, + "learning_rate": 5.46387952910532e-08, + "loss": 0.23340710997581482, + "step": 3921 + }, + { + "epoch": 1.9380946496972693, + "grad_norm": 1.2207580518535108, + "learning_rate": 5.378916219833996e-08, + "loss": 0.19458985328674316, + "step": 3922 + }, + { + "epoch": 1.9385889039911035, + "grad_norm": 1.4167428327318625, + "learning_rate": 5.2946169053869066e-08, + "loss": 0.22900202870368958, + "step": 3923 + }, + { + "epoch": 1.9390831582849377, + "grad_norm": 1.528940034628332, + "learning_rate": 5.210981642040924e-08, + "loss": 0.30710160732269287, + "step": 3924 + }, + { + "epoch": 1.939577412578772, + "grad_norm": 1.1725653667546314, + "learning_rate": 5.12801048562972e-08, + "loss": 0.1754809319972992, + "step": 3925 + }, + { + "epoch": 1.940071666872606, + "grad_norm": 1.4924045607844934, + "learning_rate": 5.045703491543763e-08, + "loss": 0.28787121176719666, + "step": 3926 + }, + { + "epoch": 1.94056592116644, + "grad_norm": 1.2741088738360473, + "learning_rate": 4.96406071472999e-08, + "loss": 0.2239963263273239, + "step": 3927 + }, + { + "epoch": 1.9410601754602743, + "grad_norm": 1.3932338575101701, + "learning_rate": 4.883082209692025e-08, + "loss": 0.2121300995349884, + "step": 3928 + }, + { + "epoch": 1.9415544297541085, + "grad_norm": 1.4253326667240858, + "learning_rate": 4.802768030489735e-08, + "loss": 0.23445773124694824, + "step": 3929 + }, + { + "epoch": 1.9420486840479425, + "grad_norm": 1.3518230097115338, + "learning_rate": 4.7231182307400095e-08, + "loss": 0.2369021326303482, + "step": 3930 + }, + { + "epoch": 1.9425429383417767, + "grad_norm": 1.4790931523959723, + "learning_rate": 4.644132863615758e-08, + "loss": 0.2764047086238861, + "step": 3931 + }, + { + "epoch": 1.943037192635611, + "grad_norm": 1.3688921537083945, + "learning_rate": 4.565811981846468e-08, + "loss": 0.26021280884742737, + "step": 3932 + }, + { + "epoch": 1.9435314469294451, + "grad_norm": 1.4010339478163996, + "learning_rate": 4.488155637718095e-08, + "loss": 0.26012706756591797, + "step": 3933 + }, + { + "epoch": 1.9440257012232793, + "grad_norm": 1.4174814798438116, + "learning_rate": 4.4111638830729444e-08, + "loss": 0.22092604637145996, + "step": 3934 + }, + { + "epoch": 1.9445199555171135, + "grad_norm": 1.3803129729570953, + "learning_rate": 4.334836769309347e-08, + "loss": 0.24200648069381714, + "step": 3935 + }, + { + "epoch": 1.9450142098109477, + "grad_norm": 1.4185191605274636, + "learning_rate": 4.2591743473826554e-08, + "loss": 0.2545608580112457, + "step": 3936 + }, + { + "epoch": 1.945508464104782, + "grad_norm": 1.4140513212071641, + "learning_rate": 4.1841766678036854e-08, + "loss": 0.24908477067947388, + "step": 3937 + }, + { + "epoch": 1.9460027183986162, + "grad_norm": 1.3236421908105307, + "learning_rate": 4.109843780639833e-08, + "loss": 0.23568233847618103, + "step": 3938 + }, + { + "epoch": 1.9464969726924504, + "grad_norm": 1.4613354363975228, + "learning_rate": 4.0361757355147355e-08, + "loss": 0.2230791449546814, + "step": 3939 + }, + { + "epoch": 1.9469912269862846, + "grad_norm": 1.4124924138900457, + "learning_rate": 3.963172581608166e-08, + "loss": 0.2541523277759552, + "step": 3940 + }, + { + "epoch": 1.9474854812801188, + "grad_norm": 1.320376312149322, + "learning_rate": 3.8908343676559156e-08, + "loss": 0.2466837763786316, + "step": 3941 + }, + { + "epoch": 1.9479797355739528, + "grad_norm": 1.434845915637092, + "learning_rate": 3.819161141950134e-08, + "loss": 0.2700938880443573, + "step": 3942 + }, + { + "epoch": 1.948473989867787, + "grad_norm": 1.2847188951445323, + "learning_rate": 3.7481529523384355e-08, + "loss": 0.2353779673576355, + "step": 3943 + }, + { + "epoch": 1.9489682441616212, + "grad_norm": 1.4586624394757335, + "learning_rate": 3.677809846225344e-08, + "loss": 0.25708913803100586, + "step": 3944 + }, + { + "epoch": 1.9494624984554554, + "grad_norm": 1.3816118361393621, + "learning_rate": 3.6081318705705195e-08, + "loss": 0.26113903522491455, + "step": 3945 + }, + { + "epoch": 1.9499567527492894, + "grad_norm": 1.353452761923649, + "learning_rate": 3.539119071890307e-08, + "loss": 0.2561355531215668, + "step": 3946 + }, + { + "epoch": 1.9504510070431236, + "grad_norm": 1.3679281740557483, + "learning_rate": 3.470771496256409e-08, + "loss": 0.24893885850906372, + "step": 3947 + }, + { + "epoch": 1.9509452613369578, + "grad_norm": 1.3725754281016815, + "learning_rate": 3.403089189296771e-08, + "loss": 0.25399699807167053, + "step": 3948 + }, + { + "epoch": 1.951439515630792, + "grad_norm": 1.4355008801200986, + "learning_rate": 3.3360721961952505e-08, + "loss": 0.2820609509944916, + "step": 3949 + }, + { + "epoch": 1.9519337699246262, + "grad_norm": 1.222474026880474, + "learning_rate": 3.269720561691281e-08, + "loss": 0.22128066420555115, + "step": 3950 + }, + { + "epoch": 1.9524280242184604, + "grad_norm": 1.2322544942302993, + "learning_rate": 3.204034330080319e-08, + "loss": 0.2132534235715866, + "step": 3951 + }, + { + "epoch": 1.9529222785122946, + "grad_norm": 1.3290638165306805, + "learning_rate": 3.1390135452135095e-08, + "loss": 0.2308463454246521, + "step": 3952 + }, + { + "epoch": 1.9534165328061288, + "grad_norm": 1.443895071093895, + "learning_rate": 3.074658250497908e-08, + "loss": 0.2756718397140503, + "step": 3953 + }, + { + "epoch": 1.953910787099963, + "grad_norm": 1.404895730578394, + "learning_rate": 3.010968488896149e-08, + "loss": 0.24619412422180176, + "step": 3954 + }, + { + "epoch": 1.9544050413937972, + "grad_norm": 1.3236097287021305, + "learning_rate": 2.9479443029265532e-08, + "loss": 0.2164454162120819, + "step": 3955 + }, + { + "epoch": 1.9548992956876314, + "grad_norm": 1.4498814795200483, + "learning_rate": 2.8855857346632432e-08, + "loss": 0.2778991460800171, + "step": 3956 + }, + { + "epoch": 1.9553935499814654, + "grad_norm": 1.3781229461817452, + "learning_rate": 2.8238928257359188e-08, + "loss": 0.22639301419258118, + "step": 3957 + }, + { + "epoch": 1.9558878042752996, + "grad_norm": 1.3946690868287814, + "learning_rate": 2.7628656173297463e-08, + "loss": 0.2367630898952484, + "step": 3958 + }, + { + "epoch": 1.9563820585691338, + "grad_norm": 1.233715623675162, + "learning_rate": 2.702504150185692e-08, + "loss": 0.2400333285331726, + "step": 3959 + }, + { + "epoch": 1.956876312862968, + "grad_norm": 1.5197718598123784, + "learning_rate": 2.6428084646001884e-08, + "loss": 0.2384340763092041, + "step": 3960 + }, + { + "epoch": 1.957370567156802, + "grad_norm": 1.2187445449938668, + "learning_rate": 2.5837786004253572e-08, + "loss": 0.20191673934459686, + "step": 3961 + }, + { + "epoch": 1.9578648214506362, + "grad_norm": 1.3530782559852856, + "learning_rate": 2.525414597068565e-08, + "loss": 0.24700434505939484, + "step": 3962 + }, + { + "epoch": 1.9583590757444704, + "grad_norm": 1.2586448244620927, + "learning_rate": 2.4677164934928665e-08, + "loss": 0.20032359659671783, + "step": 3963 + }, + { + "epoch": 1.9588533300383046, + "grad_norm": 1.5212375132060378, + "learning_rate": 2.4106843282165615e-08, + "loss": 0.280154287815094, + "step": 3964 + }, + { + "epoch": 1.9593475843321388, + "grad_norm": 1.46590896106962, + "learning_rate": 2.3543181393135274e-08, + "loss": 0.25518566370010376, + "step": 3965 + }, + { + "epoch": 1.959841838625973, + "grad_norm": 1.2792917112791735, + "learning_rate": 2.298617964413108e-08, + "loss": 0.2246837019920349, + "step": 3966 + }, + { + "epoch": 1.9603360929198073, + "grad_norm": 1.3954375167289552, + "learning_rate": 2.2435838407000034e-08, + "loss": 0.23355916142463684, + "step": 3967 + }, + { + "epoch": 1.9608303472136415, + "grad_norm": 1.3615561015896285, + "learning_rate": 2.1892158049140467e-08, + "loss": 0.2449415922164917, + "step": 3968 + }, + { + "epoch": 1.9613246015074757, + "grad_norm": 1.279518283780108, + "learning_rate": 2.1355138933507602e-08, + "loss": 0.2269652783870697, + "step": 3969 + }, + { + "epoch": 1.9618188558013099, + "grad_norm": 1.4090731883758925, + "learning_rate": 2.0824781418605776e-08, + "loss": 0.26923638582229614, + "step": 3970 + }, + { + "epoch": 1.962313110095144, + "grad_norm": 1.3838329777907195, + "learning_rate": 2.0301085858493996e-08, + "loss": 0.2631189823150635, + "step": 3971 + }, + { + "epoch": 1.9628073643889783, + "grad_norm": 1.272147209216066, + "learning_rate": 1.978405260278593e-08, + "loss": 0.23281526565551758, + "step": 3972 + }, + { + "epoch": 1.9633016186828123, + "grad_norm": 1.4504818525258278, + "learning_rate": 1.9273681996644365e-08, + "loss": 0.26399385929107666, + "step": 3973 + }, + { + "epoch": 1.9637958729766465, + "grad_norm": 1.42867652212037, + "learning_rate": 1.876997438078454e-08, + "loss": 0.2641673684120178, + "step": 3974 + }, + { + "epoch": 1.9642901272704807, + "grad_norm": 1.2944638856965318, + "learning_rate": 1.8272930091476347e-08, + "loss": 0.22440402209758759, + "step": 3975 + }, + { + "epoch": 1.9647843815643147, + "grad_norm": 1.3585170311291963, + "learning_rate": 1.778254946053881e-08, + "loss": 0.2552195191383362, + "step": 3976 + }, + { + "epoch": 1.9652786358581489, + "grad_norm": 1.3475063805104281, + "learning_rate": 1.729883281534117e-08, + "loss": 0.24455100297927856, + "step": 3977 + }, + { + "epoch": 1.965772890151983, + "grad_norm": 1.4925946223112605, + "learning_rate": 1.6821780478808448e-08, + "loss": 0.2324603945016861, + "step": 3978 + }, + { + "epoch": 1.9662671444458173, + "grad_norm": 1.295320797137711, + "learning_rate": 1.6351392769412556e-08, + "loss": 0.25488242506980896, + "step": 3979 + }, + { + "epoch": 1.9667613987396515, + "grad_norm": 1.2382372998222446, + "learning_rate": 1.5887670001177856e-08, + "loss": 0.23511120676994324, + "step": 3980 + }, + { + "epoch": 1.9672556530334857, + "grad_norm": 1.474081111410746, + "learning_rate": 1.5430612483680052e-08, + "loss": 0.2683457136154175, + "step": 3981 + }, + { + "epoch": 1.96774990732732, + "grad_norm": 1.3558148882952648, + "learning_rate": 1.4980220522041734e-08, + "loss": 0.26627787947654724, + "step": 3982 + }, + { + "epoch": 1.9682441616211541, + "grad_norm": 1.3779286197554192, + "learning_rate": 1.4536494416940162e-08, + "loss": 0.22931841015815735, + "step": 3983 + }, + { + "epoch": 1.9687384159149883, + "grad_norm": 1.3860885624616435, + "learning_rate": 1.4099434464600603e-08, + "loss": 0.22918352484703064, + "step": 3984 + }, + { + "epoch": 1.9692326702088225, + "grad_norm": 1.498852903518302, + "learning_rate": 1.3669040956797442e-08, + "loss": 0.2542854845523834, + "step": 3985 + }, + { + "epoch": 1.9697269245026567, + "grad_norm": 1.278204707841908, + "learning_rate": 1.3245314180854175e-08, + "loss": 0.21581681072711945, + "step": 3986 + }, + { + "epoch": 1.970221178796491, + "grad_norm": 1.4943144749429917, + "learning_rate": 1.2828254419646746e-08, + "loss": 0.2708613872528076, + "step": 3987 + }, + { + "epoch": 1.970715433090325, + "grad_norm": 1.3377756042264306, + "learning_rate": 1.2417861951597998e-08, + "loss": 0.25348716974258423, + "step": 3988 + }, + { + "epoch": 1.9712096873841591, + "grad_norm": 1.396109244896111, + "learning_rate": 1.2014137050677665e-08, + "loss": 0.24585089087486267, + "step": 3989 + }, + { + "epoch": 1.9717039416779933, + "grad_norm": 1.3367696007925745, + "learning_rate": 1.1617079986410152e-08, + "loss": 0.26362112164497375, + "step": 3990 + }, + { + "epoch": 1.9721981959718276, + "grad_norm": 1.815729582105598, + "learning_rate": 1.1226691023862312e-08, + "loss": 0.23288659751415253, + "step": 3991 + }, + { + "epoch": 1.9726924502656615, + "grad_norm": 1.2305688182670602, + "learning_rate": 1.0842970423654563e-08, + "loss": 0.21604478359222412, + "step": 3992 + }, + { + "epoch": 1.9731867045594957, + "grad_norm": 1.353706093653017, + "learning_rate": 1.0465918441950885e-08, + "loss": 0.21149985492229462, + "step": 3993 + }, + { + "epoch": 1.97368095885333, + "grad_norm": 1.40480632228099, + "learning_rate": 1.0095535330467698e-08, + "loss": 0.26392504572868347, + "step": 3994 + }, + { + "epoch": 1.9741752131471642, + "grad_norm": 1.246030064073758, + "learning_rate": 9.731821336466107e-09, + "loss": 0.22993823885917664, + "step": 3995 + }, + { + "epoch": 1.9746694674409984, + "grad_norm": 1.247780565740116, + "learning_rate": 9.374776702757438e-09, + "loss": 0.2207789570093155, + "step": 3996 + }, + { + "epoch": 1.9751637217348326, + "grad_norm": 1.3761642168404886, + "learning_rate": 9.024401667698802e-09, + "loss": 0.27149268984794617, + "step": 3997 + }, + { + "epoch": 1.9756579760286668, + "grad_norm": 1.4326020240148696, + "learning_rate": 8.680696465196425e-09, + "loss": 0.269406795501709, + "step": 3998 + }, + { + "epoch": 1.976152230322501, + "grad_norm": 1.4639077922370294, + "learning_rate": 8.343661324703434e-09, + "loss": 0.25354713201522827, + "step": 3999 + }, + { + "epoch": 1.9766464846163352, + "grad_norm": 1.3417205540337154, + "learning_rate": 8.013296471217624e-09, + "loss": 0.22957751154899597, + "step": 4000 + }, + { + "epoch": 1.9771407389101694, + "grad_norm": 1.4456474308400453, + "learning_rate": 7.68960212528702e-09, + "loss": 0.25355982780456543, + "step": 4001 + }, + { + "epoch": 1.9776349932040036, + "grad_norm": 1.3644174652815564, + "learning_rate": 7.372578503005434e-09, + "loss": 0.2453315556049347, + "step": 4002 + }, + { + "epoch": 1.9781292474978376, + "grad_norm": 1.4020681279841292, + "learning_rate": 7.062225816013568e-09, + "loss": 0.2274405062198639, + "step": 4003 + }, + { + "epoch": 1.9786235017916718, + "grad_norm": 1.229857953320014, + "learning_rate": 6.7585442714979136e-09, + "loss": 0.22195965051651, + "step": 4004 + }, + { + "epoch": 1.979117756085506, + "grad_norm": 1.266569965950015, + "learning_rate": 6.461534072191855e-09, + "loss": 0.18664966523647308, + "step": 4005 + }, + { + "epoch": 1.9796120103793402, + "grad_norm": 1.3364190229696613, + "learning_rate": 6.171195416375675e-09, + "loss": 0.23385149240493774, + "step": 4006 + }, + { + "epoch": 1.9801062646731742, + "grad_norm": 1.4261084517387066, + "learning_rate": 5.887528497874328e-09, + "loss": 0.3141595721244812, + "step": 4007 + }, + { + "epoch": 1.9806005189670084, + "grad_norm": 1.1455773069888675, + "learning_rate": 5.610533506060778e-09, + "loss": 0.22392721474170685, + "step": 4008 + }, + { + "epoch": 1.9810947732608426, + "grad_norm": 1.3684844463884664, + "learning_rate": 5.34021062585377e-09, + "loss": 0.24451547861099243, + "step": 4009 + }, + { + "epoch": 1.9815890275546768, + "grad_norm": 1.4628440300051093, + "learning_rate": 5.076560037714506e-09, + "loss": 0.2916273772716522, + "step": 4010 + }, + { + "epoch": 1.982083281848511, + "grad_norm": 1.4921825705668792, + "learning_rate": 4.819581917654414e-09, + "loss": 0.25200486183166504, + "step": 4011 + }, + { + "epoch": 1.9825775361423452, + "grad_norm": 1.3319991237500093, + "learning_rate": 4.569276437227377e-09, + "loss": 0.24660873413085938, + "step": 4012 + }, + { + "epoch": 1.9830717904361794, + "grad_norm": 1.286303231792404, + "learning_rate": 4.325643763534171e-09, + "loss": 0.22276514768600464, + "step": 4013 + }, + { + "epoch": 1.9835660447300136, + "grad_norm": 1.556433924523847, + "learning_rate": 4.088684059220249e-09, + "loss": 0.28938305377960205, + "step": 4014 + }, + { + "epoch": 1.9840602990238478, + "grad_norm": 1.3733276400200998, + "learning_rate": 3.85839748247685e-09, + "loss": 0.24640555679798126, + "step": 4015 + }, + { + "epoch": 1.984554553317682, + "grad_norm": 1.3418256960556196, + "learning_rate": 3.6347841870398858e-09, + "loss": 0.24476927518844604, + "step": 4016 + }, + { + "epoch": 1.9850488076115163, + "grad_norm": 1.4034327960934876, + "learning_rate": 3.417844322189945e-09, + "loss": 0.22534328699111938, + "step": 4017 + }, + { + "epoch": 1.9855430619053505, + "grad_norm": 1.4375031314885967, + "learning_rate": 3.2075780327534e-09, + "loss": 0.25029847025871277, + "step": 4018 + }, + { + "epoch": 1.9860373161991844, + "grad_norm": 1.376115670772505, + "learning_rate": 3.0039854591012994e-09, + "loss": 0.25584423542022705, + "step": 4019 + }, + { + "epoch": 1.9865315704930187, + "grad_norm": 1.3971629958782856, + "learning_rate": 2.8070667371493663e-09, + "loss": 0.26148709654808044, + "step": 4020 + }, + { + "epoch": 1.9870258247868529, + "grad_norm": 1.3375096203856904, + "learning_rate": 2.6168219983557786e-09, + "loss": 0.2510269284248352, + "step": 4021 + }, + { + "epoch": 1.987520079080687, + "grad_norm": 1.2826838094230344, + "learning_rate": 2.433251369727829e-09, + "loss": 0.24044418334960938, + "step": 4022 + }, + { + "epoch": 1.988014333374521, + "grad_norm": 1.3500084280255722, + "learning_rate": 2.256354973813046e-09, + "loss": 0.25793880224227905, + "step": 4023 + }, + { + "epoch": 1.9885085876683553, + "grad_norm": 1.4052663822447213, + "learning_rate": 2.086132928705853e-09, + "loss": 0.23637095093727112, + "step": 4024 + }, + { + "epoch": 1.9890028419621895, + "grad_norm": 1.4859565437270463, + "learning_rate": 1.9225853480431267e-09, + "loss": 0.29570624232292175, + "step": 4025 + }, + { + "epoch": 1.9894970962560237, + "grad_norm": 1.423587654508064, + "learning_rate": 1.7657123410075306e-09, + "loss": 0.2697899341583252, + "step": 4026 + }, + { + "epoch": 1.9899913505498579, + "grad_norm": 1.6022210661391376, + "learning_rate": 1.615514012324182e-09, + "loss": 0.28011834621429443, + "step": 4027 + }, + { + "epoch": 1.990485604843692, + "grad_norm": 1.4192356954254337, + "learning_rate": 1.4719904622650939e-09, + "loss": 0.2770778238773346, + "step": 4028 + }, + { + "epoch": 1.9909798591375263, + "grad_norm": 1.3392534385035628, + "learning_rate": 1.335141786642513e-09, + "loss": 0.2662060558795929, + "step": 4029 + }, + { + "epoch": 1.9914741134313605, + "grad_norm": 1.2456909926822828, + "learning_rate": 1.2049680768166928e-09, + "loss": 0.21188628673553467, + "step": 4030 + }, + { + "epoch": 1.9919683677251947, + "grad_norm": 1.4172757737685897, + "learning_rate": 1.0814694196892294e-09, + "loss": 0.2749127745628357, + "step": 4031 + }, + { + "epoch": 1.992462622019029, + "grad_norm": 1.378123997738594, + "learning_rate": 9.64645897704175e-10, + "loss": 0.2271629124879837, + "step": 4032 + }, + { + "epoch": 1.9929568763128631, + "grad_norm": 1.3952087957146098, + "learning_rate": 8.544975888535867e-10, + "loss": 0.2704155743122101, + "step": 4033 + }, + { + "epoch": 1.993451130606697, + "grad_norm": 1.2966104091933182, + "learning_rate": 7.51024566670866e-10, + "loss": 0.23073506355285645, + "step": 4034 + }, + { + "epoch": 1.9939453849005313, + "grad_norm": 1.3325329668608163, + "learning_rate": 6.542269002307589e-10, + "loss": 0.21597059071063995, + "step": 4035 + }, + { + "epoch": 1.9944396391943655, + "grad_norm": 1.3812576049514713, + "learning_rate": 5.641046541560169e-10, + "loss": 0.21368899941444397, + "step": 4036 + }, + { + "epoch": 1.9949338934881997, + "grad_norm": 1.4586669387690583, + "learning_rate": 4.806578886107361e-10, + "loss": 0.2594050168991089, + "step": 4037 + }, + { + "epoch": 1.9954281477820337, + "grad_norm": 1.3017520645594045, + "learning_rate": 4.03886659302577e-10, + "loss": 0.21630799770355225, + "step": 4038 + }, + { + "epoch": 1.995922402075868, + "grad_norm": 1.3597057268680945, + "learning_rate": 3.337910174827652e-10, + "loss": 0.25168266892433167, + "step": 4039 + }, + { + "epoch": 1.9964166563697021, + "grad_norm": 1.7213418381035832, + "learning_rate": 2.70371009946091e-10, + "loss": 0.27963966131210327, + "step": 4040 + }, + { + "epoch": 1.9969109106635363, + "grad_norm": 1.575376951155568, + "learning_rate": 2.1362667903090938e-10, + "loss": 0.2196345329284668, + "step": 4041 + }, + { + "epoch": 1.9974051649573705, + "grad_norm": 1.3640521467532305, + "learning_rate": 1.6355806262025043e-10, + "loss": 0.22677919268608093, + "step": 4042 + }, + { + "epoch": 1.9978994192512047, + "grad_norm": 1.3667805176029666, + "learning_rate": 1.2016519413626804e-10, + "loss": 0.23044565320014954, + "step": 4043 + }, + { + "epoch": 1.998393673545039, + "grad_norm": 1.4131604362123111, + "learning_rate": 8.344810255023206e-11, + "loss": 0.2739316523075104, + "step": 4044 + }, + { + "epoch": 1.9988879278388731, + "grad_norm": 1.4085870126547175, + "learning_rate": 5.3406812372536196e-11, + "loss": 0.2610301673412323, + "step": 4045 + }, + { + "epoch": 1.9993821821327074, + "grad_norm": 1.2504694698952337, + "learning_rate": 3.004134365824918e-11, + "loss": 0.23652175068855286, + "step": 4046 + }, + { + "epoch": 1.9998764364265416, + "grad_norm": 1.2951296197807791, + "learning_rate": 1.3351712006004492e-11, + "loss": 0.23983967304229736, + "step": 4047 + }, + { + "epoch": 2.0, + "grad_norm": 2.5975889779248975, + "learning_rate": 3.337928559110637e-12, + "loss": 0.2640778720378876, + "step": 4048 + } + ], + "logging_steps": 1, + "max_steps": 4048, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3066106945339392.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4048/training_args.bin b/checkpoint-4048/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dfe3e09693106b888d9a74120f900fc466890d4c --- /dev/null +++ b/checkpoint-4048/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0dfb10ba35de856be3ab9b2b044348b5752efc43fb83f0d6e71a782894a3001 +size 6968 diff --git a/checkpoint-4048/zero_to_fp32.py b/checkpoint-4048/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-4048/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/processor_config.json b/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..094b5f407da3f532ec6bb52f7e804534ce26d83d --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.0, + "total_flos": 3066106945339392.0, + "train_loss": 0.3191354194832708, + "train_runtime": 52842.7617, + "train_samples_per_second": 1.225, + "train_steps_per_second": 0.077 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6f086b8972f05427e8b28454411bbffb13077e26 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,4049 @@ +{"current_steps": 1, "total_steps": 4048, "loss": 1.1816105842590332, "lr": 0.0, "epoch": 0.0004942542938341777, "percentage": 0.02, "elapsed_time": "0:00:18", "remaining_time": "21:13:06"} +{"current_steps": 2, "total_steps": 4048, "loss": 1.1496102809906006, "lr": 9.852216748768474e-08, "epoch": 0.0009885085876683553, "percentage": 0.05, "elapsed_time": "0:00:32", "remaining_time": "18:16:50"} +{"current_steps": 3, "total_steps": 4048, "loss": 1.1515967845916748, "lr": 1.9704433497536947e-07, "epoch": 0.001482762881502533, "percentage": 0.07, "elapsed_time": "0:00:45", "remaining_time": "17:00:05"} +{"current_steps": 4, "total_steps": 4048, "loss": 1.1795943975448608, "lr": 2.955665024630542e-07, "epoch": 0.0019770171753367106, "percentage": 0.1, "elapsed_time": "0:00:59", "remaining_time": "16:36:59"} +{"current_steps": 5, "total_steps": 4048, "loss": 1.2289564609527588, "lr": 3.9408866995073894e-07, "epoch": 0.0024712714691708885, "percentage": 0.12, "elapsed_time": "0:01:11", "remaining_time": "16:08:55"} +{"current_steps": 6, "total_steps": 4048, "loss": 1.179269790649414, "lr": 4.926108374384237e-07, "epoch": 0.002965525763005066, "percentage": 0.15, "elapsed_time": "0:01:26", "remaining_time": "16:05:42"} +{"current_steps": 7, "total_steps": 4048, "loss": 1.199608564376831, "lr": 5.911330049261084e-07, "epoch": 0.003459780056839244, "percentage": 0.17, "elapsed_time": "0:01:39", "remaining_time": "15:53:59"} +{"current_steps": 8, "total_steps": 4048, "loss": 1.1643707752227783, "lr": 6.896551724137931e-07, "epoch": 0.003954034350673421, "percentage": 0.2, "elapsed_time": "0:01:52", "remaining_time": "15:44:33"} +{"current_steps": 9, "total_steps": 4048, "loss": 1.1264240741729736, "lr": 7.881773399014779e-07, "epoch": 0.004448288644507599, "percentage": 0.22, "elapsed_time": "0:02:06", "remaining_time": "15:44:05"} +{"current_steps": 10, "total_steps": 4048, "loss": 1.1717555522918701, "lr": 8.866995073891626e-07, "epoch": 0.004942542938341777, "percentage": 0.25, "elapsed_time": "0:02:19", "remaining_time": "15:39:16"} +{"current_steps": 11, "total_steps": 4048, "loss": 1.1856712102890015, "lr": 9.852216748768474e-07, "epoch": 0.005436797232175955, "percentage": 0.27, "elapsed_time": "0:02:33", "remaining_time": "15:40:14"} +{"current_steps": 12, "total_steps": 4048, "loss": 1.1258785724639893, "lr": 1.0837438423645322e-06, "epoch": 0.005931051526010132, "percentage": 0.3, "elapsed_time": "0:02:47", "remaining_time": "15:37:54"} +{"current_steps": 13, "total_steps": 4048, "loss": 1.1333656311035156, "lr": 1.1822660098522167e-06, "epoch": 0.00642530581984431, "percentage": 0.32, "elapsed_time": "0:03:00", "remaining_time": "15:36:10"} +{"current_steps": 14, "total_steps": 4048, "loss": 1.2281363010406494, "lr": 1.2807881773399017e-06, "epoch": 0.006919560113678488, "percentage": 0.35, "elapsed_time": "0:03:13", "remaining_time": "15:30:44"} +{"current_steps": 15, "total_steps": 4048, "loss": 1.1910676956176758, "lr": 1.3793103448275862e-06, "epoch": 0.0074138144075126654, "percentage": 0.37, "elapsed_time": "0:03:27", "remaining_time": "15:27:44"} +{"current_steps": 16, "total_steps": 4048, "loss": 1.2124552726745605, "lr": 1.4778325123152712e-06, "epoch": 0.007908068701346842, "percentage": 0.4, "elapsed_time": "0:03:39", "remaining_time": "15:22:55"} +{"current_steps": 17, "total_steps": 4048, "loss": 1.1993463039398193, "lr": 1.5763546798029558e-06, "epoch": 0.008402322995181021, "percentage": 0.42, "elapsed_time": "0:03:52", "remaining_time": "15:19:44"} +{"current_steps": 18, "total_steps": 4048, "loss": 1.1245683431625366, "lr": 1.6748768472906405e-06, "epoch": 0.008896577289015198, "percentage": 0.44, "elapsed_time": "0:04:05", "remaining_time": "15:15:30"} +{"current_steps": 19, "total_steps": 4048, "loss": 1.1838568449020386, "lr": 1.7733990147783253e-06, "epoch": 0.009390831582849375, "percentage": 0.47, "elapsed_time": "0:04:18", "remaining_time": "15:14:33"} +{"current_steps": 20, "total_steps": 4048, "loss": 1.081169843673706, "lr": 1.8719211822660098e-06, "epoch": 0.009885085876683554, "percentage": 0.49, "elapsed_time": "0:04:31", "remaining_time": "15:12:22"} +{"current_steps": 21, "total_steps": 4048, "loss": 1.1506569385528564, "lr": 1.970443349753695e-06, "epoch": 0.010379340170517731, "percentage": 0.52, "elapsed_time": "0:04:44", "remaining_time": "15:10:32"} +{"current_steps": 22, "total_steps": 4048, "loss": 1.0841327905654907, "lr": 2.0689655172413796e-06, "epoch": 0.01087359446435191, "percentage": 0.54, "elapsed_time": "0:04:57", "remaining_time": "15:08:08"} +{"current_steps": 23, "total_steps": 4048, "loss": 1.1335525512695312, "lr": 2.1674876847290643e-06, "epoch": 0.011367848758186087, "percentage": 0.57, "elapsed_time": "0:05:10", "remaining_time": "15:06:11"} +{"current_steps": 24, "total_steps": 4048, "loss": 1.035188913345337, "lr": 2.266009852216749e-06, "epoch": 0.011862103052020264, "percentage": 0.59, "elapsed_time": "0:05:23", "remaining_time": "15:03:54"} +{"current_steps": 25, "total_steps": 4048, "loss": 1.0640877485275269, "lr": 2.3645320197044334e-06, "epoch": 0.012356357345854442, "percentage": 0.62, "elapsed_time": "0:05:35", "remaining_time": "15:00:46"} +{"current_steps": 26, "total_steps": 4048, "loss": 1.0479273796081543, "lr": 2.4630541871921186e-06, "epoch": 0.01285061163968862, "percentage": 0.64, "elapsed_time": "0:05:48", "remaining_time": "14:59:08"} +{"current_steps": 27, "total_steps": 4048, "loss": 1.0522505044937134, "lr": 2.5615763546798034e-06, "epoch": 0.013344865933522798, "percentage": 0.67, "elapsed_time": "0:06:01", "remaining_time": "14:58:07"} +{"current_steps": 28, "total_steps": 4048, "loss": 1.080836296081543, "lr": 2.660098522167488e-06, "epoch": 0.013839120227356975, "percentage": 0.69, "elapsed_time": "0:06:14", "remaining_time": "14:57:05"} +{"current_steps": 29, "total_steps": 4048, "loss": 0.9712544679641724, "lr": 2.7586206896551725e-06, "epoch": 0.014333374521191152, "percentage": 0.72, "elapsed_time": "0:06:28", "remaining_time": "14:57:25"} +{"current_steps": 30, "total_steps": 4048, "loss": 1.0469061136245728, "lr": 2.8571428571428573e-06, "epoch": 0.014827628815025331, "percentage": 0.74, "elapsed_time": "0:06:42", "remaining_time": "14:58:30"} +{"current_steps": 31, "total_steps": 4048, "loss": 0.9911116361618042, "lr": 2.9556650246305424e-06, "epoch": 0.015321883108859508, "percentage": 0.77, "elapsed_time": "0:06:55", "remaining_time": "14:58:18"} +{"current_steps": 32, "total_steps": 4048, "loss": 0.9552959203720093, "lr": 3.054187192118227e-06, "epoch": 0.015816137402693685, "percentage": 0.79, "elapsed_time": "0:07:10", "remaining_time": "14:59:35"} +{"current_steps": 33, "total_steps": 4048, "loss": 0.957429051399231, "lr": 3.1527093596059115e-06, "epoch": 0.016310391696527864, "percentage": 0.82, "elapsed_time": "0:07:23", "remaining_time": "14:58:42"} +{"current_steps": 34, "total_steps": 4048, "loss": 1.0180628299713135, "lr": 3.2512315270935963e-06, "epoch": 0.016804645990362042, "percentage": 0.84, "elapsed_time": "0:07:37", "remaining_time": "14:59:18"} +{"current_steps": 35, "total_steps": 4048, "loss": 0.9064415097236633, "lr": 3.349753694581281e-06, "epoch": 0.017298900284196218, "percentage": 0.86, "elapsed_time": "0:07:50", "remaining_time": "14:58:59"} +{"current_steps": 36, "total_steps": 4048, "loss": 0.9718184471130371, "lr": 3.448275862068966e-06, "epoch": 0.017793154578030396, "percentage": 0.89, "elapsed_time": "0:08:04", "remaining_time": "14:59:33"} +{"current_steps": 37, "total_steps": 4048, "loss": 0.8831444978713989, "lr": 3.5467980295566506e-06, "epoch": 0.018287408871864575, "percentage": 0.91, "elapsed_time": "0:08:17", "remaining_time": "14:59:28"} +{"current_steps": 38, "total_steps": 4048, "loss": 0.9167139530181885, "lr": 3.6453201970443354e-06, "epoch": 0.01878166316569875, "percentage": 0.94, "elapsed_time": "0:08:31", "remaining_time": "15:00:07"} +{"current_steps": 39, "total_steps": 4048, "loss": 0.9322037696838379, "lr": 3.7438423645320197e-06, "epoch": 0.01927591745953293, "percentage": 0.96, "elapsed_time": "0:08:45", "remaining_time": "14:59:47"} +{"current_steps": 40, "total_steps": 4048, "loss": 0.9189817905426025, "lr": 3.842364532019705e-06, "epoch": 0.019770171753367108, "percentage": 0.99, "elapsed_time": "0:08:59", "remaining_time": "15:00:09"} +{"current_steps": 41, "total_steps": 4048, "loss": 0.8480448126792908, "lr": 3.94088669950739e-06, "epoch": 0.020264426047201287, "percentage": 1.01, "elapsed_time": "0:09:11", "remaining_time": "14:59:01"} +{"current_steps": 42, "total_steps": 4048, "loss": 0.8907301425933838, "lr": 4.039408866995074e-06, "epoch": 0.020758680341035462, "percentage": 1.04, "elapsed_time": "0:09:25", "remaining_time": "14:58:41"} +{"current_steps": 43, "total_steps": 4048, "loss": 0.8674390316009521, "lr": 4.137931034482759e-06, "epoch": 0.02125293463486964, "percentage": 1.06, "elapsed_time": "0:09:39", "remaining_time": "14:59:10"} +{"current_steps": 44, "total_steps": 4048, "loss": 0.8674882054328918, "lr": 4.236453201970444e-06, "epoch": 0.02174718892870382, "percentage": 1.09, "elapsed_time": "0:09:53", "remaining_time": "14:59:27"} +{"current_steps": 45, "total_steps": 4048, "loss": 0.8542560338973999, "lr": 4.334975369458129e-06, "epoch": 0.022241443222537995, "percentage": 1.11, "elapsed_time": "0:10:06", "remaining_time": "14:59:44"} +{"current_steps": 46, "total_steps": 4048, "loss": 0.772778332233429, "lr": 4.4334975369458135e-06, "epoch": 0.022735697516372173, "percentage": 1.14, "elapsed_time": "0:10:20", "remaining_time": "14:59:25"} +{"current_steps": 47, "total_steps": 4048, "loss": 0.7481152415275574, "lr": 4.532019704433498e-06, "epoch": 0.023229951810206352, "percentage": 1.16, "elapsed_time": "0:10:34", "remaining_time": "15:00:18"} +{"current_steps": 48, "total_steps": 4048, "loss": 0.8373709917068481, "lr": 4.630541871921182e-06, "epoch": 0.023724206104040527, "percentage": 1.19, "elapsed_time": "0:10:47", "remaining_time": "14:59:41"} +{"current_steps": 49, "total_steps": 4048, "loss": 0.8163385391235352, "lr": 4.729064039408867e-06, "epoch": 0.024218460397874706, "percentage": 1.21, "elapsed_time": "0:11:01", "remaining_time": "15:00:14"} +{"current_steps": 50, "total_steps": 4048, "loss": 0.7444975972175598, "lr": 4.8275862068965525e-06, "epoch": 0.024712714691708885, "percentage": 1.24, "elapsed_time": "0:11:14", "remaining_time": "14:59:06"} +{"current_steps": 51, "total_steps": 4048, "loss": 0.7683243751525879, "lr": 4.926108374384237e-06, "epoch": 0.025206968985543064, "percentage": 1.26, "elapsed_time": "0:11:27", "remaining_time": "14:58:33"} +{"current_steps": 52, "total_steps": 4048, "loss": 0.806761622428894, "lr": 5.024630541871922e-06, "epoch": 0.02570122327937724, "percentage": 1.28, "elapsed_time": "0:11:40", "remaining_time": "14:57:44"} +{"current_steps": 53, "total_steps": 4048, "loss": 0.7312102913856506, "lr": 5.123152709359607e-06, "epoch": 0.026195477573211418, "percentage": 1.31, "elapsed_time": "0:11:54", "remaining_time": "14:57:26"} +{"current_steps": 54, "total_steps": 4048, "loss": 0.7351999282836914, "lr": 5.2216748768472915e-06, "epoch": 0.026689731867045596, "percentage": 1.33, "elapsed_time": "0:12:07", "remaining_time": "14:56:29"} +{"current_steps": 55, "total_steps": 4048, "loss": 0.7453763484954834, "lr": 5.320197044334976e-06, "epoch": 0.02718398616087977, "percentage": 1.36, "elapsed_time": "0:12:20", "remaining_time": "14:55:33"} +{"current_steps": 56, "total_steps": 4048, "loss": 0.7063292860984802, "lr": 5.41871921182266e-06, "epoch": 0.02767824045471395, "percentage": 1.38, "elapsed_time": "0:12:33", "remaining_time": "14:55:06"} +{"current_steps": 57, "total_steps": 4048, "loss": 0.7145994901657104, "lr": 5.517241379310345e-06, "epoch": 0.02817249474854813, "percentage": 1.41, "elapsed_time": "0:12:46", "remaining_time": "14:54:36"} +{"current_steps": 58, "total_steps": 4048, "loss": 0.687594473361969, "lr": 5.61576354679803e-06, "epoch": 0.028666749042382304, "percentage": 1.43, "elapsed_time": "0:12:59", "remaining_time": "14:54:04"} +{"current_steps": 59, "total_steps": 4048, "loss": 0.6643895506858826, "lr": 5.7142857142857145e-06, "epoch": 0.029161003336216483, "percentage": 1.46, "elapsed_time": "0:13:12", "remaining_time": "14:53:29"} +{"current_steps": 60, "total_steps": 4048, "loss": 0.6781614422798157, "lr": 5.812807881773399e-06, "epoch": 0.029655257630050662, "percentage": 1.48, "elapsed_time": "0:13:26", "remaining_time": "14:53:17"} +{"current_steps": 61, "total_steps": 4048, "loss": 0.6209158301353455, "lr": 5.911330049261085e-06, "epoch": 0.030149511923884837, "percentage": 1.51, "elapsed_time": "0:13:39", "remaining_time": "14:52:49"} +{"current_steps": 62, "total_steps": 4048, "loss": 0.6424679756164551, "lr": 6.00985221674877e-06, "epoch": 0.030643766217719016, "percentage": 1.53, "elapsed_time": "0:13:53", "remaining_time": "14:52:35"} +{"current_steps": 63, "total_steps": 4048, "loss": 0.6745971441268921, "lr": 6.108374384236454e-06, "epoch": 0.031138020511553195, "percentage": 1.56, "elapsed_time": "0:14:06", "remaining_time": "14:52:04"} +{"current_steps": 64, "total_steps": 4048, "loss": 0.6520330905914307, "lr": 6.206896551724138e-06, "epoch": 0.03163227480538737, "percentage": 1.58, "elapsed_time": "0:14:20", "remaining_time": "14:52:38"} +{"current_steps": 65, "total_steps": 4048, "loss": 0.6790571212768555, "lr": 6.305418719211823e-06, "epoch": 0.03212652909922155, "percentage": 1.61, "elapsed_time": "0:14:33", "remaining_time": "14:52:13"} +{"current_steps": 66, "total_steps": 4048, "loss": 0.6491506099700928, "lr": 6.403940886699508e-06, "epoch": 0.03262078339305573, "percentage": 1.63, "elapsed_time": "0:14:47", "remaining_time": "14:52:25"} +{"current_steps": 67, "total_steps": 4048, "loss": 0.6347313523292542, "lr": 6.502463054187193e-06, "epoch": 0.033115037686889906, "percentage": 1.66, "elapsed_time": "0:15:01", "remaining_time": "14:52:24"} +{"current_steps": 68, "total_steps": 4048, "loss": 0.6785881519317627, "lr": 6.600985221674877e-06, "epoch": 0.033609291980724085, "percentage": 1.68, "elapsed_time": "0:15:15", "remaining_time": "14:52:52"} +{"current_steps": 69, "total_steps": 4048, "loss": 0.6470085978507996, "lr": 6.699507389162562e-06, "epoch": 0.034103546274558263, "percentage": 1.7, "elapsed_time": "0:15:28", "remaining_time": "14:52:30"} +{"current_steps": 70, "total_steps": 4048, "loss": 0.6205961108207703, "lr": 6.798029556650246e-06, "epoch": 0.034597800568392435, "percentage": 1.73, "elapsed_time": "0:15:42", "remaining_time": "14:52:35"} +{"current_steps": 71, "total_steps": 4048, "loss": 0.6621580123901367, "lr": 6.896551724137932e-06, "epoch": 0.035092054862226614, "percentage": 1.75, "elapsed_time": "0:15:55", "remaining_time": "14:52:13"} +{"current_steps": 72, "total_steps": 4048, "loss": 0.6363088488578796, "lr": 6.995073891625616e-06, "epoch": 0.03558630915606079, "percentage": 1.78, "elapsed_time": "0:16:09", "remaining_time": "14:52:29"} +{"current_steps": 73, "total_steps": 4048, "loss": 0.6073004007339478, "lr": 7.093596059113301e-06, "epoch": 0.03608056344989497, "percentage": 1.8, "elapsed_time": "0:16:23", "remaining_time": "14:52:28"} +{"current_steps": 74, "total_steps": 4048, "loss": 0.6490880846977234, "lr": 7.192118226600986e-06, "epoch": 0.03657481774372915, "percentage": 1.83, "elapsed_time": "0:16:37", "remaining_time": "14:52:48"} +{"current_steps": 75, "total_steps": 4048, "loss": 0.6540624499320984, "lr": 7.290640394088671e-06, "epoch": 0.03706907203756333, "percentage": 1.85, "elapsed_time": "0:16:51", "remaining_time": "14:53:01"} +{"current_steps": 76, "total_steps": 4048, "loss": 0.6237976551055908, "lr": 7.3891625615763555e-06, "epoch": 0.0375633263313975, "percentage": 1.88, "elapsed_time": "0:17:04", "remaining_time": "14:52:37"} +{"current_steps": 77, "total_steps": 4048, "loss": 0.6121219992637634, "lr": 7.487684729064039e-06, "epoch": 0.03805758062523168, "percentage": 1.9, "elapsed_time": "0:17:18", "remaining_time": "14:52:49"} +{"current_steps": 78, "total_steps": 4048, "loss": 0.5785888433456421, "lr": 7.586206896551724e-06, "epoch": 0.03855183491906586, "percentage": 1.93, "elapsed_time": "0:17:32", "remaining_time": "14:52:27"} +{"current_steps": 79, "total_steps": 4048, "loss": 0.6144810914993286, "lr": 7.68472906403941e-06, "epoch": 0.03904608921290004, "percentage": 1.95, "elapsed_time": "0:17:46", "remaining_time": "14:52:50"} +{"current_steps": 80, "total_steps": 4048, "loss": 0.6522500514984131, "lr": 7.783251231527095e-06, "epoch": 0.039540343506734216, "percentage": 1.98, "elapsed_time": "0:17:59", "remaining_time": "14:52:47"} +{"current_steps": 81, "total_steps": 4048, "loss": 0.6126501560211182, "lr": 7.88177339901478e-06, "epoch": 0.040034597800568394, "percentage": 2.0, "elapsed_time": "0:18:13", "remaining_time": "14:52:39"} +{"current_steps": 82, "total_steps": 4048, "loss": 0.573388934135437, "lr": 7.980295566502464e-06, "epoch": 0.04052885209440257, "percentage": 2.03, "elapsed_time": "0:18:27", "remaining_time": "14:52:23"} +{"current_steps": 83, "total_steps": 4048, "loss": 0.6462322473526001, "lr": 8.078817733990149e-06, "epoch": 0.041023106388236745, "percentage": 2.05, "elapsed_time": "0:18:40", "remaining_time": "14:51:52"} +{"current_steps": 84, "total_steps": 4048, "loss": 0.6542905569076538, "lr": 8.177339901477834e-06, "epoch": 0.041517360682070924, "percentage": 2.08, "elapsed_time": "0:18:53", "remaining_time": "14:51:21"} +{"current_steps": 85, "total_steps": 4048, "loss": 0.6539976000785828, "lr": 8.275862068965518e-06, "epoch": 0.0420116149759051, "percentage": 2.1, "elapsed_time": "0:19:06", "remaining_time": "14:50:50"} +{"current_steps": 86, "total_steps": 4048, "loss": 0.6303049325942993, "lr": 8.374384236453203e-06, "epoch": 0.04250586926973928, "percentage": 2.12, "elapsed_time": "0:19:19", "remaining_time": "14:50:36"} +{"current_steps": 87, "total_steps": 4048, "loss": 0.5727078318595886, "lr": 8.472906403940888e-06, "epoch": 0.04300012356357346, "percentage": 2.15, "elapsed_time": "0:19:32", "remaining_time": "14:49:59"} +{"current_steps": 88, "total_steps": 4048, "loss": 0.6204914450645447, "lr": 8.571428571428571e-06, "epoch": 0.04349437785740764, "percentage": 2.17, "elapsed_time": "0:19:46", "remaining_time": "14:49:53"} +{"current_steps": 89, "total_steps": 4048, "loss": 0.633359432220459, "lr": 8.669950738916257e-06, "epoch": 0.04398863215124181, "percentage": 2.2, "elapsed_time": "0:19:59", "remaining_time": "14:49:18"} +{"current_steps": 90, "total_steps": 4048, "loss": 0.5737719535827637, "lr": 8.768472906403942e-06, "epoch": 0.04448288644507599, "percentage": 2.22, "elapsed_time": "0:20:13", "remaining_time": "14:49:15"} +{"current_steps": 91, "total_steps": 4048, "loss": 0.6438707709312439, "lr": 8.866995073891627e-06, "epoch": 0.04497714073891017, "percentage": 2.25, "elapsed_time": "0:20:26", "remaining_time": "14:48:40"} +{"current_steps": 92, "total_steps": 4048, "loss": 0.6284823417663574, "lr": 8.965517241379312e-06, "epoch": 0.04547139503274435, "percentage": 2.27, "elapsed_time": "0:20:40", "remaining_time": "14:48:51"} +{"current_steps": 93, "total_steps": 4048, "loss": 0.6442058086395264, "lr": 9.064039408866996e-06, "epoch": 0.045965649326578525, "percentage": 2.3, "elapsed_time": "0:20:53", "remaining_time": "14:48:17"} +{"current_steps": 94, "total_steps": 4048, "loss": 0.5821751356124878, "lr": 9.162561576354681e-06, "epoch": 0.046459903620412704, "percentage": 2.32, "elapsed_time": "0:21:06", "remaining_time": "14:48:03"} +{"current_steps": 95, "total_steps": 4048, "loss": 0.546042263507843, "lr": 9.261083743842364e-06, "epoch": 0.04695415791424688, "percentage": 2.35, "elapsed_time": "0:21:20", "remaining_time": "14:47:48"} +{"current_steps": 96, "total_steps": 4048, "loss": 0.5743244886398315, "lr": 9.359605911330049e-06, "epoch": 0.047448412208081055, "percentage": 2.37, "elapsed_time": "0:21:33", "remaining_time": "14:47:19"} +{"current_steps": 97, "total_steps": 4048, "loss": 0.5775831341743469, "lr": 9.458128078817734e-06, "epoch": 0.04794266650191523, "percentage": 2.4, "elapsed_time": "0:21:47", "remaining_time": "14:47:25"} +{"current_steps": 98, "total_steps": 4048, "loss": 0.5632016658782959, "lr": 9.55665024630542e-06, "epoch": 0.04843692079574941, "percentage": 2.42, "elapsed_time": "0:22:00", "remaining_time": "14:46:53"} +{"current_steps": 99, "total_steps": 4048, "loss": 0.5817564129829407, "lr": 9.655172413793105e-06, "epoch": 0.04893117508958359, "percentage": 2.45, "elapsed_time": "0:22:13", "remaining_time": "14:46:24"} +{"current_steps": 100, "total_steps": 4048, "loss": 0.5692225098609924, "lr": 9.75369458128079e-06, "epoch": 0.04942542938341777, "percentage": 2.47, "elapsed_time": "0:22:26", "remaining_time": "14:45:42"} +{"current_steps": 101, "total_steps": 4048, "loss": 0.5239434242248535, "lr": 9.852216748768475e-06, "epoch": 0.04991968367725195, "percentage": 2.5, "elapsed_time": "0:22:43", "remaining_time": "14:48:16"} +{"current_steps": 102, "total_steps": 4048, "loss": 0.543138861656189, "lr": 9.95073891625616e-06, "epoch": 0.05041393797108613, "percentage": 2.52, "elapsed_time": "0:22:55", "remaining_time": "14:47:09"} +{"current_steps": 103, "total_steps": 4048, "loss": 0.5914052128791809, "lr": 1.0049261083743844e-05, "epoch": 0.0509081922649203, "percentage": 2.54, "elapsed_time": "0:23:08", "remaining_time": "14:46:15"} +{"current_steps": 104, "total_steps": 4048, "loss": 0.5394442081451416, "lr": 1.0147783251231529e-05, "epoch": 0.05140244655875448, "percentage": 2.57, "elapsed_time": "0:23:20", "remaining_time": "14:45:08"} +{"current_steps": 105, "total_steps": 4048, "loss": 0.6157902479171753, "lr": 1.0246305418719214e-05, "epoch": 0.051896700852588656, "percentage": 2.59, "elapsed_time": "0:23:32", "remaining_time": "14:44:17"} +{"current_steps": 106, "total_steps": 4048, "loss": 0.5863415598869324, "lr": 1.0344827586206898e-05, "epoch": 0.052390955146422835, "percentage": 2.62, "elapsed_time": "0:23:45", "remaining_time": "14:43:21"} +{"current_steps": 107, "total_steps": 4048, "loss": 0.5783145427703857, "lr": 1.0443349753694583e-05, "epoch": 0.052885209440257014, "percentage": 2.64, "elapsed_time": "0:23:58", "remaining_time": "14:43:02"} +{"current_steps": 108, "total_steps": 4048, "loss": 0.5761469006538391, "lr": 1.0541871921182268e-05, "epoch": 0.05337946373409119, "percentage": 2.67, "elapsed_time": "0:24:11", "remaining_time": "14:42:27"} +{"current_steps": 109, "total_steps": 4048, "loss": 0.5931205749511719, "lr": 1.0640394088669953e-05, "epoch": 0.053873718027925364, "percentage": 2.69, "elapsed_time": "0:24:24", "remaining_time": "14:42:15"} +{"current_steps": 110, "total_steps": 4048, "loss": 0.5429986119270325, "lr": 1.0738916256157637e-05, "epoch": 0.05436797232175954, "percentage": 2.72, "elapsed_time": "0:24:37", "remaining_time": "14:41:44"} +{"current_steps": 111, "total_steps": 4048, "loss": 0.5154455304145813, "lr": 1.083743842364532e-05, "epoch": 0.05486222661559372, "percentage": 2.74, "elapsed_time": "0:24:51", "remaining_time": "14:41:32"} +{"current_steps": 112, "total_steps": 4048, "loss": 0.5465028285980225, "lr": 1.0935960591133005e-05, "epoch": 0.0553564809094279, "percentage": 2.77, "elapsed_time": "0:25:04", "remaining_time": "14:41:07"} +{"current_steps": 113, "total_steps": 4048, "loss": 0.5973349213600159, "lr": 1.103448275862069e-05, "epoch": 0.05585073520326208, "percentage": 2.79, "elapsed_time": "0:25:17", "remaining_time": "14:40:31"} +{"current_steps": 114, "total_steps": 4048, "loss": 0.6201578378677368, "lr": 1.1133004926108375e-05, "epoch": 0.05634498949709626, "percentage": 2.82, "elapsed_time": "0:25:30", "remaining_time": "14:40:13"} +{"current_steps": 115, "total_steps": 4048, "loss": 0.5090143084526062, "lr": 1.123152709359606e-05, "epoch": 0.05683924379093044, "percentage": 2.84, "elapsed_time": "0:25:43", "remaining_time": "14:39:39"} +{"current_steps": 116, "total_steps": 4048, "loss": 0.5275869369506836, "lr": 1.1330049261083744e-05, "epoch": 0.05733349808476461, "percentage": 2.87, "elapsed_time": "0:25:56", "remaining_time": "14:39:27"} +{"current_steps": 117, "total_steps": 4048, "loss": 0.571302056312561, "lr": 1.1428571428571429e-05, "epoch": 0.05782775237859879, "percentage": 2.89, "elapsed_time": "0:26:08", "remaining_time": "14:38:29"} +{"current_steps": 118, "total_steps": 4048, "loss": 0.5920293927192688, "lr": 1.1527093596059114e-05, "epoch": 0.058322006672432966, "percentage": 2.92, "elapsed_time": "0:26:21", "remaining_time": "14:37:47"} +{"current_steps": 119, "total_steps": 4048, "loss": 0.5877068042755127, "lr": 1.1625615763546799e-05, "epoch": 0.058816260966267145, "percentage": 2.94, "elapsed_time": "0:26:33", "remaining_time": "14:36:56"} +{"current_steps": 120, "total_steps": 4048, "loss": 0.6140042543411255, "lr": 1.1724137931034483e-05, "epoch": 0.059310515260101324, "percentage": 2.96, "elapsed_time": "0:26:46", "remaining_time": "14:36:13"} +{"current_steps": 121, "total_steps": 4048, "loss": 0.5642052292823792, "lr": 1.182266009852217e-05, "epoch": 0.0598047695539355, "percentage": 2.99, "elapsed_time": "0:26:58", "remaining_time": "14:35:14"} +{"current_steps": 122, "total_steps": 4048, "loss": 0.5535261034965515, "lr": 1.1921182266009855e-05, "epoch": 0.060299023847769674, "percentage": 3.01, "elapsed_time": "0:27:10", "remaining_time": "14:34:30"} +{"current_steps": 123, "total_steps": 4048, "loss": 0.5202849507331848, "lr": 1.201970443349754e-05, "epoch": 0.06079327814160385, "percentage": 3.04, "elapsed_time": "0:27:22", "remaining_time": "14:33:26"} +{"current_steps": 124, "total_steps": 4048, "loss": 0.5626791715621948, "lr": 1.2118226600985224e-05, "epoch": 0.06128753243543803, "percentage": 3.06, "elapsed_time": "0:27:34", "remaining_time": "14:32:33"} +{"current_steps": 125, "total_steps": 4048, "loss": 0.5416101217269897, "lr": 1.2216748768472909e-05, "epoch": 0.06178178672927221, "percentage": 3.09, "elapsed_time": "0:27:46", "remaining_time": "14:31:53"} +{"current_steps": 126, "total_steps": 4048, "loss": 0.5683388710021973, "lr": 1.2315270935960592e-05, "epoch": 0.06227604102310639, "percentage": 3.11, "elapsed_time": "0:27:59", "remaining_time": "14:31:12"} +{"current_steps": 127, "total_steps": 4048, "loss": 0.564468264579773, "lr": 1.2413793103448277e-05, "epoch": 0.06277029531694056, "percentage": 3.14, "elapsed_time": "0:28:12", "remaining_time": "14:30:44"} +{"current_steps": 128, "total_steps": 4048, "loss": 0.5419844388961792, "lr": 1.2512315270935961e-05, "epoch": 0.06326454961077474, "percentage": 3.16, "elapsed_time": "0:28:24", "remaining_time": "14:30:15"} +{"current_steps": 129, "total_steps": 4048, "loss": 0.51283860206604, "lr": 1.2610837438423646e-05, "epoch": 0.06375880390460892, "percentage": 3.19, "elapsed_time": "0:28:38", "remaining_time": "14:29:52"} +{"current_steps": 130, "total_steps": 4048, "loss": 0.5807296633720398, "lr": 1.2709359605911331e-05, "epoch": 0.0642530581984431, "percentage": 3.21, "elapsed_time": "0:28:50", "remaining_time": "14:29:20"} +{"current_steps": 131, "total_steps": 4048, "loss": 0.5277815461158752, "lr": 1.2807881773399016e-05, "epoch": 0.06474731249227728, "percentage": 3.24, "elapsed_time": "0:29:03", "remaining_time": "14:29:01"} +{"current_steps": 132, "total_steps": 4048, "loss": 0.5044680833816528, "lr": 1.29064039408867e-05, "epoch": 0.06524156678611145, "percentage": 3.26, "elapsed_time": "0:29:16", "remaining_time": "14:28:33"} +{"current_steps": 133, "total_steps": 4048, "loss": 0.5412886738777161, "lr": 1.3004926108374385e-05, "epoch": 0.06573582107994563, "percentage": 3.29, "elapsed_time": "0:29:29", "remaining_time": "14:28:12"} +{"current_steps": 134, "total_steps": 4048, "loss": 0.5314532518386841, "lr": 1.310344827586207e-05, "epoch": 0.06623007537377981, "percentage": 3.31, "elapsed_time": "0:29:42", "remaining_time": "14:27:49"} +{"current_steps": 135, "total_steps": 4048, "loss": 0.5544138550758362, "lr": 1.3201970443349755e-05, "epoch": 0.06672432966761399, "percentage": 3.33, "elapsed_time": "0:29:55", "remaining_time": "14:27:17"} +{"current_steps": 136, "total_steps": 4048, "loss": 0.5745705366134644, "lr": 1.330049261083744e-05, "epoch": 0.06721858396144817, "percentage": 3.36, "elapsed_time": "0:30:08", "remaining_time": "14:27:03"} +{"current_steps": 137, "total_steps": 4048, "loss": 0.5361800789833069, "lr": 1.3399014778325124e-05, "epoch": 0.06771283825528235, "percentage": 3.38, "elapsed_time": "0:30:21", "remaining_time": "14:26:27"} +{"current_steps": 138, "total_steps": 4048, "loss": 0.5878221392631531, "lr": 1.3497536945812807e-05, "epoch": 0.06820709254911653, "percentage": 3.41, "elapsed_time": "0:30:33", "remaining_time": "14:25:50"} +{"current_steps": 139, "total_steps": 4048, "loss": 0.5952787399291992, "lr": 1.3596059113300492e-05, "epoch": 0.06870134684295069, "percentage": 3.43, "elapsed_time": "0:30:45", "remaining_time": "14:25:02"} +{"current_steps": 140, "total_steps": 4048, "loss": 0.5334340929985046, "lr": 1.369458128078818e-05, "epoch": 0.06919560113678487, "percentage": 3.46, "elapsed_time": "0:30:58", "remaining_time": "14:24:30"} +{"current_steps": 141, "total_steps": 4048, "loss": 0.5297533273696899, "lr": 1.3793103448275863e-05, "epoch": 0.06968985543061905, "percentage": 3.48, "elapsed_time": "0:31:10", "remaining_time": "14:23:44"} +{"current_steps": 142, "total_steps": 4048, "loss": 0.5388105511665344, "lr": 1.3891625615763548e-05, "epoch": 0.07018410972445323, "percentage": 3.51, "elapsed_time": "0:31:22", "remaining_time": "14:23:10"} +{"current_steps": 143, "total_steps": 4048, "loss": 0.484375536441803, "lr": 1.3990147783251233e-05, "epoch": 0.0706783640182874, "percentage": 3.53, "elapsed_time": "0:31:35", "remaining_time": "14:22:29"} +{"current_steps": 144, "total_steps": 4048, "loss": 0.5395358800888062, "lr": 1.4088669950738918e-05, "epoch": 0.07117261831212159, "percentage": 3.56, "elapsed_time": "0:31:47", "remaining_time": "14:22:02"} +{"current_steps": 145, "total_steps": 4048, "loss": 0.501459538936615, "lr": 1.4187192118226602e-05, "epoch": 0.07166687260595576, "percentage": 3.58, "elapsed_time": "0:32:00", "remaining_time": "14:21:24"} +{"current_steps": 146, "total_steps": 4048, "loss": 0.5390491485595703, "lr": 1.4285714285714287e-05, "epoch": 0.07216112689978994, "percentage": 3.61, "elapsed_time": "0:32:12", "remaining_time": "14:20:53"} +{"current_steps": 147, "total_steps": 4048, "loss": 0.505649745464325, "lr": 1.4384236453201972e-05, "epoch": 0.07265538119362412, "percentage": 3.63, "elapsed_time": "0:32:24", "remaining_time": "14:20:14"} +{"current_steps": 148, "total_steps": 4048, "loss": 0.5155121684074402, "lr": 1.4482758620689657e-05, "epoch": 0.0731496354874583, "percentage": 3.66, "elapsed_time": "0:32:37", "remaining_time": "14:19:43"} +{"current_steps": 149, "total_steps": 4048, "loss": 0.5502114295959473, "lr": 1.4581280788177341e-05, "epoch": 0.07364388978129248, "percentage": 3.68, "elapsed_time": "0:32:49", "remaining_time": "14:19:00"} +{"current_steps": 150, "total_steps": 4048, "loss": 0.5243497490882874, "lr": 1.4679802955665026e-05, "epoch": 0.07413814407512666, "percentage": 3.71, "elapsed_time": "0:33:02", "remaining_time": "14:18:29"} +{"current_steps": 151, "total_steps": 4048, "loss": 0.529721736907959, "lr": 1.4778325123152711e-05, "epoch": 0.07463239836896084, "percentage": 3.73, "elapsed_time": "0:33:14", "remaining_time": "14:17:47"} +{"current_steps": 152, "total_steps": 4048, "loss": 0.4721008241176605, "lr": 1.4876847290640396e-05, "epoch": 0.075126652662795, "percentage": 3.75, "elapsed_time": "0:33:26", "remaining_time": "14:17:09"} +{"current_steps": 153, "total_steps": 4048, "loss": 0.46029576659202576, "lr": 1.4975369458128079e-05, "epoch": 0.07562090695662918, "percentage": 3.78, "elapsed_time": "0:33:39", "remaining_time": "14:16:41"} +{"current_steps": 154, "total_steps": 4048, "loss": 0.5151746273040771, "lr": 1.5073891625615764e-05, "epoch": 0.07611516125046336, "percentage": 3.8, "elapsed_time": "0:33:51", "remaining_time": "14:16:04"} +{"current_steps": 155, "total_steps": 4048, "loss": 0.4743254780769348, "lr": 1.5172413793103448e-05, "epoch": 0.07660941554429754, "percentage": 3.83, "elapsed_time": "0:34:03", "remaining_time": "14:15:37"} +{"current_steps": 156, "total_steps": 4048, "loss": 0.5167561769485474, "lr": 1.5270935960591133e-05, "epoch": 0.07710366983813172, "percentage": 3.85, "elapsed_time": "0:34:16", "remaining_time": "14:15:09"} +{"current_steps": 157, "total_steps": 4048, "loss": 0.47482365369796753, "lr": 1.536945812807882e-05, "epoch": 0.0775979241319659, "percentage": 3.88, "elapsed_time": "0:34:29", "remaining_time": "14:14:39"} +{"current_steps": 158, "total_steps": 4048, "loss": 0.5088409781455994, "lr": 1.5467980295566506e-05, "epoch": 0.07809217842580007, "percentage": 3.9, "elapsed_time": "0:34:41", "remaining_time": "14:14:02"} +{"current_steps": 159, "total_steps": 4048, "loss": 0.5264201164245605, "lr": 1.556650246305419e-05, "epoch": 0.07858643271963425, "percentage": 3.93, "elapsed_time": "0:34:53", "remaining_time": "14:13:34"} +{"current_steps": 160, "total_steps": 4048, "loss": 0.5475984811782837, "lr": 1.5665024630541875e-05, "epoch": 0.07908068701346843, "percentage": 3.95, "elapsed_time": "0:35:06", "remaining_time": "14:13:01"} +{"current_steps": 161, "total_steps": 4048, "loss": 0.5652282238006592, "lr": 1.576354679802956e-05, "epoch": 0.07957494130730261, "percentage": 3.98, "elapsed_time": "0:35:19", "remaining_time": "14:12:43"} +{"current_steps": 162, "total_steps": 4048, "loss": 0.5179979801177979, "lr": 1.586206896551724e-05, "epoch": 0.08006919560113679, "percentage": 4.0, "elapsed_time": "0:35:31", "remaining_time": "14:12:15"} +{"current_steps": 163, "total_steps": 4048, "loss": 0.4966253638267517, "lr": 1.5960591133004928e-05, "epoch": 0.08056344989497097, "percentage": 4.03, "elapsed_time": "0:35:44", "remaining_time": "14:11:57"} +{"current_steps": 164, "total_steps": 4048, "loss": 0.5216315388679504, "lr": 1.605911330049261e-05, "epoch": 0.08105770418880515, "percentage": 4.05, "elapsed_time": "0:35:57", "remaining_time": "14:11:31"} +{"current_steps": 165, "total_steps": 4048, "loss": 0.495576411485672, "lr": 1.6157635467980298e-05, "epoch": 0.08155195848263931, "percentage": 4.08, "elapsed_time": "0:36:09", "remaining_time": "14:11:03"} +{"current_steps": 166, "total_steps": 4048, "loss": 0.5101697444915771, "lr": 1.625615763546798e-05, "epoch": 0.08204621277647349, "percentage": 4.1, "elapsed_time": "0:36:22", "remaining_time": "14:10:45"} +{"current_steps": 167, "total_steps": 4048, "loss": 0.5438036918640137, "lr": 1.6354679802955667e-05, "epoch": 0.08254046707030767, "percentage": 4.13, "elapsed_time": "0:36:35", "remaining_time": "14:10:13"} +{"current_steps": 168, "total_steps": 4048, "loss": 0.5043500661849976, "lr": 1.645320197044335e-05, "epoch": 0.08303472136414185, "percentage": 4.15, "elapsed_time": "0:36:48", "remaining_time": "14:10:05"} +{"current_steps": 169, "total_steps": 4048, "loss": 0.5129355788230896, "lr": 1.6551724137931037e-05, "epoch": 0.08352897565797603, "percentage": 4.17, "elapsed_time": "0:37:01", "remaining_time": "14:09:46"} +{"current_steps": 170, "total_steps": 4048, "loss": 0.48643916845321655, "lr": 1.665024630541872e-05, "epoch": 0.0840232299518102, "percentage": 4.2, "elapsed_time": "0:37:14", "remaining_time": "14:09:43"} +{"current_steps": 171, "total_steps": 4048, "loss": 0.5300272703170776, "lr": 1.6748768472906406e-05, "epoch": 0.08451748424564438, "percentage": 4.22, "elapsed_time": "0:37:27", "remaining_time": "14:09:25"} +{"current_steps": 172, "total_steps": 4048, "loss": 0.5321004390716553, "lr": 1.684729064039409e-05, "epoch": 0.08501173853947856, "percentage": 4.25, "elapsed_time": "0:37:41", "remaining_time": "14:09:17"} +{"current_steps": 173, "total_steps": 4048, "loss": 0.5066401958465576, "lr": 1.6945812807881776e-05, "epoch": 0.08550599283331274, "percentage": 4.27, "elapsed_time": "0:37:54", "remaining_time": "14:09:00"} +{"current_steps": 174, "total_steps": 4048, "loss": 0.48993563652038574, "lr": 1.704433497536946e-05, "epoch": 0.08600024712714692, "percentage": 4.3, "elapsed_time": "0:38:07", "remaining_time": "14:08:53"} +{"current_steps": 175, "total_steps": 4048, "loss": 0.5468013882637024, "lr": 1.7142857142857142e-05, "epoch": 0.0864945014209811, "percentage": 4.32, "elapsed_time": "0:38:20", "remaining_time": "14:08:35"} +{"current_steps": 176, "total_steps": 4048, "loss": 0.5081865191459656, "lr": 1.7241379310344828e-05, "epoch": 0.08698875571481528, "percentage": 4.35, "elapsed_time": "0:38:34", "remaining_time": "14:08:29"} +{"current_steps": 177, "total_steps": 4048, "loss": 0.48374873399734497, "lr": 1.7339901477832515e-05, "epoch": 0.08748301000864946, "percentage": 4.37, "elapsed_time": "0:38:47", "remaining_time": "14:08:12"} +{"current_steps": 178, "total_steps": 4048, "loss": 0.4929465651512146, "lr": 1.7438423645320198e-05, "epoch": 0.08797726430248362, "percentage": 4.4, "elapsed_time": "0:39:00", "remaining_time": "14:08:07"} +{"current_steps": 179, "total_steps": 4048, "loss": 0.49666428565979004, "lr": 1.7536945812807884e-05, "epoch": 0.0884715185963178, "percentage": 4.42, "elapsed_time": "0:39:13", "remaining_time": "14:07:49"} +{"current_steps": 180, "total_steps": 4048, "loss": 0.5705476403236389, "lr": 1.7635467980295567e-05, "epoch": 0.08896577289015198, "percentage": 4.45, "elapsed_time": "0:39:26", "remaining_time": "14:07:39"} +{"current_steps": 181, "total_steps": 4048, "loss": 0.5466605424880981, "lr": 1.7733990147783254e-05, "epoch": 0.08946002718398616, "percentage": 4.47, "elapsed_time": "0:39:39", "remaining_time": "14:07:15"} +{"current_steps": 182, "total_steps": 4048, "loss": 0.47837337851524353, "lr": 1.7832512315270937e-05, "epoch": 0.08995428147782034, "percentage": 4.5, "elapsed_time": "0:39:52", "remaining_time": "14:07:06"} +{"current_steps": 183, "total_steps": 4048, "loss": 0.5370041131973267, "lr": 1.7931034482758623e-05, "epoch": 0.09044853577165451, "percentage": 4.52, "elapsed_time": "0:40:05", "remaining_time": "14:06:49"} +{"current_steps": 184, "total_steps": 4048, "loss": 0.540340006351471, "lr": 1.8029556650246306e-05, "epoch": 0.0909427900654887, "percentage": 4.55, "elapsed_time": "0:40:18", "remaining_time": "14:06:27"} +{"current_steps": 185, "total_steps": 4048, "loss": 0.5165396928787231, "lr": 1.8128078817733993e-05, "epoch": 0.09143704435932287, "percentage": 4.57, "elapsed_time": "0:40:31", "remaining_time": "14:06:19"} +{"current_steps": 186, "total_steps": 4048, "loss": 0.5391616821289062, "lr": 1.8226600985221676e-05, "epoch": 0.09193129865315705, "percentage": 4.59, "elapsed_time": "0:40:44", "remaining_time": "14:05:55"} +{"current_steps": 187, "total_steps": 4048, "loss": 0.472774475812912, "lr": 1.8325123152709362e-05, "epoch": 0.09242555294699123, "percentage": 4.62, "elapsed_time": "0:40:57", "remaining_time": "14:05:50"} +{"current_steps": 188, "total_steps": 4048, "loss": 0.5079161524772644, "lr": 1.8423645320197045e-05, "epoch": 0.09291980724082541, "percentage": 4.64, "elapsed_time": "0:41:10", "remaining_time": "14:05:31"} +{"current_steps": 189, "total_steps": 4048, "loss": 0.4909520149230957, "lr": 1.852216748768473e-05, "epoch": 0.09341406153465959, "percentage": 4.67, "elapsed_time": "0:41:24", "remaining_time": "14:05:22"} +{"current_steps": 190, "total_steps": 4048, "loss": 0.5214540362358093, "lr": 1.8620689655172415e-05, "epoch": 0.09390831582849377, "percentage": 4.69, "elapsed_time": "0:41:37", "remaining_time": "14:05:05"} +{"current_steps": 191, "total_steps": 4048, "loss": 0.4820341467857361, "lr": 1.8719211822660098e-05, "epoch": 0.09440257012232794, "percentage": 4.72, "elapsed_time": "0:41:50", "remaining_time": "14:05:06"} +{"current_steps": 192, "total_steps": 4048, "loss": 0.5094855427742004, "lr": 1.8817733990147784e-05, "epoch": 0.09489682441616211, "percentage": 4.74, "elapsed_time": "0:42:03", "remaining_time": "14:04:40"} +{"current_steps": 193, "total_steps": 4048, "loss": 0.47840312123298645, "lr": 1.8916256157635468e-05, "epoch": 0.09539107870999629, "percentage": 4.77, "elapsed_time": "0:42:16", "remaining_time": "14:04:23"} +{"current_steps": 194, "total_steps": 4048, "loss": 0.5368070602416992, "lr": 1.9014778325123154e-05, "epoch": 0.09588533300383047, "percentage": 4.79, "elapsed_time": "0:42:28", "remaining_time": "14:03:55"} +{"current_steps": 195, "total_steps": 4048, "loss": 0.493880033493042, "lr": 1.911330049261084e-05, "epoch": 0.09637958729766465, "percentage": 4.82, "elapsed_time": "0:42:41", "remaining_time": "14:03:25"} +{"current_steps": 196, "total_steps": 4048, "loss": 0.5052261352539062, "lr": 1.9211822660098524e-05, "epoch": 0.09687384159149882, "percentage": 4.84, "elapsed_time": "0:42:53", "remaining_time": "14:03:06"} +{"current_steps": 197, "total_steps": 4048, "loss": 0.4817495346069336, "lr": 1.931034482758621e-05, "epoch": 0.097368095885333, "percentage": 4.87, "elapsed_time": "0:43:06", "remaining_time": "14:02:49"} +{"current_steps": 198, "total_steps": 4048, "loss": 0.530259370803833, "lr": 1.9408866995073893e-05, "epoch": 0.09786235017916718, "percentage": 4.89, "elapsed_time": "0:43:20", "remaining_time": "14:02:41"} +{"current_steps": 199, "total_steps": 4048, "loss": 0.4984540045261383, "lr": 1.950738916256158e-05, "epoch": 0.09835660447300136, "percentage": 4.92, "elapsed_time": "0:43:33", "remaining_time": "14:02:25"} +{"current_steps": 200, "total_steps": 4048, "loss": 0.5472708940505981, "lr": 1.9605911330049263e-05, "epoch": 0.09885085876683554, "percentage": 4.94, "elapsed_time": "0:43:46", "remaining_time": "14:02:22"} +{"current_steps": 201, "total_steps": 4048, "loss": 0.5394926071166992, "lr": 1.970443349753695e-05, "epoch": 0.09934511306066972, "percentage": 4.97, "elapsed_time": "0:44:04", "remaining_time": "14:03:26"} +{"current_steps": 202, "total_steps": 4048, "loss": 0.5299160480499268, "lr": 1.9802955665024632e-05, "epoch": 0.0998393673545039, "percentage": 4.99, "elapsed_time": "0:44:16", "remaining_time": "14:03:00"} +{"current_steps": 203, "total_steps": 4048, "loss": 0.506400465965271, "lr": 1.990147783251232e-05, "epoch": 0.10033362164833808, "percentage": 5.01, "elapsed_time": "0:44:28", "remaining_time": "14:02:29"} +{"current_steps": 204, "total_steps": 4048, "loss": 0.47956231236457825, "lr": 2e-05, "epoch": 0.10082787594217225, "percentage": 5.04, "elapsed_time": "0:44:41", "remaining_time": "14:02:05"} +{"current_steps": 205, "total_steps": 4048, "loss": 0.48805660009384155, "lr": 1.9999996662071442e-05, "epoch": 0.10132213023600642, "percentage": 5.06, "elapsed_time": "0:44:53", "remaining_time": "14:01:36"} +{"current_steps": 206, "total_steps": 4048, "loss": 0.46014025807380676, "lr": 1.9999986648287996e-05, "epoch": 0.1018163845298406, "percentage": 5.09, "elapsed_time": "0:45:05", "remaining_time": "14:01:03"} +{"current_steps": 207, "total_steps": 4048, "loss": 0.4654610753059387, "lr": 1.9999969958656345e-05, "epoch": 0.10231063882367478, "percentage": 5.11, "elapsed_time": "0:45:18", "remaining_time": "14:00:43"} +{"current_steps": 208, "total_steps": 4048, "loss": 0.47037336230278015, "lr": 1.999994659318763e-05, "epoch": 0.10280489311750896, "percentage": 5.14, "elapsed_time": "0:45:30", "remaining_time": "14:00:15"} +{"current_steps": 209, "total_steps": 4048, "loss": 0.4853154718875885, "lr": 1.999991655189745e-05, "epoch": 0.10329914741134313, "percentage": 5.16, "elapsed_time": "0:45:43", "remaining_time": "13:59:57"} +{"current_steps": 210, "total_steps": 4048, "loss": 0.4918109178543091, "lr": 1.9999879834805865e-05, "epoch": 0.10379340170517731, "percentage": 5.19, "elapsed_time": "0:45:56", "remaining_time": "13:59:35"} +{"current_steps": 211, "total_steps": 4048, "loss": 0.5136955380439758, "lr": 1.999983644193738e-05, "epoch": 0.10428765599901149, "percentage": 5.21, "elapsed_time": "0:46:09", "remaining_time": "13:59:26"} +{"current_steps": 212, "total_steps": 4048, "loss": 0.5145115852355957, "lr": 1.9999786373320972e-05, "epoch": 0.10478191029284567, "percentage": 5.24, "elapsed_time": "0:46:22", "remaining_time": "13:59:04"} +{"current_steps": 213, "total_steps": 4048, "loss": 0.4624764025211334, "lr": 1.9999729628990058e-05, "epoch": 0.10527616458667985, "percentage": 5.26, "elapsed_time": "0:46:35", "remaining_time": "13:58:59"} +{"current_steps": 214, "total_steps": 4048, "loss": 0.4599718749523163, "lr": 1.9999666208982518e-05, "epoch": 0.10577041888051403, "percentage": 5.29, "elapsed_time": "0:46:48", "remaining_time": "13:58:42"} +{"current_steps": 215, "total_steps": 4048, "loss": 0.4642864465713501, "lr": 1.99995961133407e-05, "epoch": 0.1062646731743482, "percentage": 5.31, "elapsed_time": "0:47:02", "remaining_time": "13:58:30"} +{"current_steps": 216, "total_steps": 4048, "loss": 0.4756677448749542, "lr": 1.9999519342111392e-05, "epoch": 0.10675892746818239, "percentage": 5.34, "elapsed_time": "0:47:14", "remaining_time": "13:58:10"} +{"current_steps": 217, "total_steps": 4048, "loss": 0.4982803463935852, "lr": 1.9999435895345846e-05, "epoch": 0.10725318176201656, "percentage": 5.36, "elapsed_time": "0:47:28", "remaining_time": "13:58:00"} +{"current_steps": 218, "total_steps": 4048, "loss": 0.5189295411109924, "lr": 1.999934577309977e-05, "epoch": 0.10774743605585073, "percentage": 5.39, "elapsed_time": "0:47:40", "remaining_time": "13:57:38"} +{"current_steps": 219, "total_steps": 4048, "loss": 0.5077873468399048, "lr": 1.999924897543333e-05, "epoch": 0.10824169034968491, "percentage": 5.41, "elapsed_time": "0:47:53", "remaining_time": "13:57:27"} +{"current_steps": 220, "total_steps": 4048, "loss": 0.5510451793670654, "lr": 1.9999145502411148e-05, "epoch": 0.10873594464351909, "percentage": 5.43, "elapsed_time": "0:48:06", "remaining_time": "13:57:07"} +{"current_steps": 221, "total_steps": 4048, "loss": 0.44604551792144775, "lr": 1.9999035354102298e-05, "epoch": 0.10923019893735327, "percentage": 5.46, "elapsed_time": "0:48:20", "remaining_time": "13:57:01"} +{"current_steps": 222, "total_steps": 4048, "loss": 0.42567160725593567, "lr": 1.9998918530580315e-05, "epoch": 0.10972445323118744, "percentage": 5.48, "elapsed_time": "0:48:33", "remaining_time": "13:56:47"} +{"current_steps": 223, "total_steps": 4048, "loss": 0.4622190594673157, "lr": 1.9998795031923186e-05, "epoch": 0.11021870752502162, "percentage": 5.51, "elapsed_time": "0:48:46", "remaining_time": "13:56:42"} +{"current_steps": 224, "total_steps": 4048, "loss": 0.5023611783981323, "lr": 1.999866485821336e-05, "epoch": 0.1107129618188558, "percentage": 5.53, "elapsed_time": "0:49:00", "remaining_time": "13:56:33"} +{"current_steps": 225, "total_steps": 4048, "loss": 0.451701819896698, "lr": 1.9998528009537735e-05, "epoch": 0.11120721611268998, "percentage": 5.56, "elapsed_time": "0:49:13", "remaining_time": "13:56:21"} +{"current_steps": 226, "total_steps": 4048, "loss": 0.48493725061416626, "lr": 1.9998384485987675e-05, "epoch": 0.11170147040652416, "percentage": 5.58, "elapsed_time": "0:49:26", "remaining_time": "13:56:13"} +{"current_steps": 227, "total_steps": 4048, "loss": 0.45377853512763977, "lr": 1.9998234287658996e-05, "epoch": 0.11219572470035834, "percentage": 5.61, "elapsed_time": "0:49:39", "remaining_time": "13:55:57"} +{"current_steps": 228, "total_steps": 4048, "loss": 0.48963701725006104, "lr": 1.9998077414651957e-05, "epoch": 0.11268997899419252, "percentage": 5.63, "elapsed_time": "0:49:53", "remaining_time": "13:55:48"} +{"current_steps": 229, "total_steps": 4048, "loss": 0.47935402393341064, "lr": 1.9997913867071296e-05, "epoch": 0.1131842332880267, "percentage": 5.66, "elapsed_time": "0:50:06", "remaining_time": "13:55:34"} +{"current_steps": 230, "total_steps": 4048, "loss": 0.46203523874282837, "lr": 1.999774364502619e-05, "epoch": 0.11367848758186087, "percentage": 5.68, "elapsed_time": "0:50:19", "remaining_time": "13:55:26"} +{"current_steps": 231, "total_steps": 4048, "loss": 0.4411412179470062, "lr": 1.9997566748630274e-05, "epoch": 0.11417274187569504, "percentage": 5.71, "elapsed_time": "0:50:32", "remaining_time": "13:55:08"} +{"current_steps": 232, "total_steps": 4048, "loss": 0.44424787163734436, "lr": 1.9997383178001646e-05, "epoch": 0.11466699616952922, "percentage": 5.73, "elapsed_time": "0:50:46", "remaining_time": "13:55:01"} +{"current_steps": 233, "total_steps": 4048, "loss": 0.4862042963504791, "lr": 1.9997192933262853e-05, "epoch": 0.1151612504633634, "percentage": 5.76, "elapsed_time": "0:50:58", "remaining_time": "13:54:40"} +{"current_steps": 234, "total_steps": 4048, "loss": 0.49599340558052063, "lr": 1.99969960145409e-05, "epoch": 0.11565550475719757, "percentage": 5.78, "elapsed_time": "0:51:12", "remaining_time": "13:54:32"} +{"current_steps": 235, "total_steps": 4048, "loss": 0.49702027440071106, "lr": 1.999679242196725e-05, "epoch": 0.11614975905103175, "percentage": 5.81, "elapsed_time": "0:51:24", "remaining_time": "13:54:09"} +{"current_steps": 236, "total_steps": 4048, "loss": 0.520037829875946, "lr": 1.9996582155677813e-05, "epoch": 0.11664401334486593, "percentage": 5.83, "elapsed_time": "0:51:37", "remaining_time": "13:53:50"} +{"current_steps": 237, "total_steps": 4048, "loss": 0.4571160674095154, "lr": 1.999636521581296e-05, "epoch": 0.11713826763870011, "percentage": 5.85, "elapsed_time": "0:51:50", "remaining_time": "13:53:37"} +{"current_steps": 238, "total_steps": 4048, "loss": 0.45602840185165405, "lr": 1.9996141602517526e-05, "epoch": 0.11763252193253429, "percentage": 5.88, "elapsed_time": "0:52:03", "remaining_time": "13:53:15"} +{"current_steps": 239, "total_steps": 4048, "loss": 0.4909728169441223, "lr": 1.999591131594078e-05, "epoch": 0.11812677622636847, "percentage": 5.9, "elapsed_time": "0:52:16", "remaining_time": "13:53:05"} +{"current_steps": 240, "total_steps": 4048, "loss": 0.47716090083122253, "lr": 1.9995674356236468e-05, "epoch": 0.11862103052020265, "percentage": 5.93, "elapsed_time": "0:52:28", "remaining_time": "13:52:40"} +{"current_steps": 241, "total_steps": 4048, "loss": 0.4449527859687805, "lr": 1.9995430723562774e-05, "epoch": 0.11911528481403683, "percentage": 5.95, "elapsed_time": "0:52:41", "remaining_time": "13:52:22"} +{"current_steps": 242, "total_steps": 4048, "loss": 0.49069035053253174, "lr": 1.9995180418082347e-05, "epoch": 0.119609539107871, "percentage": 5.98, "elapsed_time": "0:52:53", "remaining_time": "13:51:58"} +{"current_steps": 243, "total_steps": 4048, "loss": 0.506738543510437, "lr": 1.9994923439962286e-05, "epoch": 0.12010379340170518, "percentage": 6.0, "elapsed_time": "0:53:07", "remaining_time": "13:51:49"} +{"current_steps": 244, "total_steps": 4048, "loss": 0.38516658544540405, "lr": 1.9994659789374145e-05, "epoch": 0.12059804769553935, "percentage": 6.03, "elapsed_time": "0:53:19", "remaining_time": "13:51:25"} +{"current_steps": 245, "total_steps": 4048, "loss": 0.49539780616760254, "lr": 1.9994389466493942e-05, "epoch": 0.12109230198937353, "percentage": 6.05, "elapsed_time": "0:53:32", "remaining_time": "13:51:06"} +{"current_steps": 246, "total_steps": 4048, "loss": 0.4400706887245178, "lr": 1.999411247150213e-05, "epoch": 0.1215865562832077, "percentage": 6.08, "elapsed_time": "0:53:45", "remaining_time": "13:50:45"} +{"current_steps": 247, "total_steps": 4048, "loss": 0.48815736174583435, "lr": 1.9993828804583625e-05, "epoch": 0.12208081057704188, "percentage": 6.1, "elapsed_time": "0:53:57", "remaining_time": "13:50:26"} +{"current_steps": 248, "total_steps": 4048, "loss": 0.42744773626327515, "lr": 1.999353846592781e-05, "epoch": 0.12257506487087606, "percentage": 6.13, "elapsed_time": "0:54:10", "remaining_time": "13:50:05"} +{"current_steps": 249, "total_steps": 4048, "loss": 0.4370969235897064, "lr": 1.9993241455728505e-05, "epoch": 0.12306931916471024, "percentage": 6.15, "elapsed_time": "0:54:23", "remaining_time": "13:49:52"} +{"current_steps": 250, "total_steps": 4048, "loss": 0.4803960621356964, "lr": 1.9992937774183988e-05, "epoch": 0.12356357345854442, "percentage": 6.18, "elapsed_time": "0:54:36", "remaining_time": "13:49:40"} +{"current_steps": 251, "total_steps": 4048, "loss": 0.4614640474319458, "lr": 1.9992627421496994e-05, "epoch": 0.1240578277523786, "percentage": 6.2, "elapsed_time": "0:54:49", "remaining_time": "13:49:22"} +{"current_steps": 252, "total_steps": 4048, "loss": 0.46626490354537964, "lr": 1.9992310397874715e-05, "epoch": 0.12455208204621278, "percentage": 6.23, "elapsed_time": "0:55:02", "remaining_time": "13:49:12"} +{"current_steps": 253, "total_steps": 4048, "loss": 0.4812886416912079, "lr": 1.9991986703528784e-05, "epoch": 0.12504633634004694, "percentage": 6.25, "elapsed_time": "0:55:15", "remaining_time": "13:48:51"} +{"current_steps": 254, "total_steps": 4048, "loss": 0.45037686824798584, "lr": 1.99916563386753e-05, "epoch": 0.12554059063388112, "percentage": 6.27, "elapsed_time": "0:55:28", "remaining_time": "13:48:40"} +{"current_steps": 255, "total_steps": 4048, "loss": 0.48492124676704407, "lr": 1.9991319303534804e-05, "epoch": 0.1260348449277153, "percentage": 6.3, "elapsed_time": "0:55:41", "remaining_time": "13:48:19"} +{"current_steps": 256, "total_steps": 4048, "loss": 0.48825496435165405, "lr": 1.9990975598332304e-05, "epoch": 0.12652909922154948, "percentage": 6.32, "elapsed_time": "0:55:54", "remaining_time": "13:48:08"} +{"current_steps": 257, "total_steps": 4048, "loss": 0.4836634695529938, "lr": 1.9990625223297244e-05, "epoch": 0.12702335351538366, "percentage": 6.35, "elapsed_time": "0:56:07", "remaining_time": "13:47:54"} +{"current_steps": 258, "total_steps": 4048, "loss": 0.4632943272590637, "lr": 1.9990268178663538e-05, "epoch": 0.12751760780921784, "percentage": 6.37, "elapsed_time": "0:56:21", "remaining_time": "13:47:48"} +{"current_steps": 259, "total_steps": 4048, "loss": 0.4601137042045593, "lr": 1.9989904464669533e-05, "epoch": 0.12801186210305202, "percentage": 6.4, "elapsed_time": "0:56:34", "remaining_time": "13:47:35"} +{"current_steps": 260, "total_steps": 4048, "loss": 0.4390139579772949, "lr": 1.998953408155805e-05, "epoch": 0.1285061163968862, "percentage": 6.42, "elapsed_time": "0:56:47", "remaining_time": "13:47:27"} +{"current_steps": 261, "total_steps": 4048, "loss": 0.45749080181121826, "lr": 1.9989157029576348e-05, "epoch": 0.12900037069072037, "percentage": 6.45, "elapsed_time": "0:57:00", "remaining_time": "13:47:13"} +{"current_steps": 262, "total_steps": 4048, "loss": 0.4490616023540497, "lr": 1.998877330897614e-05, "epoch": 0.12949462498455455, "percentage": 6.47, "elapsed_time": "0:57:14", "remaining_time": "13:47:04"} +{"current_steps": 263, "total_steps": 4048, "loss": 0.4819987714290619, "lr": 1.998838292001359e-05, "epoch": 0.12998887927838873, "percentage": 6.5, "elapsed_time": "0:57:27", "remaining_time": "13:46:50"} +{"current_steps": 264, "total_steps": 4048, "loss": 0.4448384940624237, "lr": 1.9987985862949325e-05, "epoch": 0.1304831335722229, "percentage": 6.52, "elapsed_time": "0:57:40", "remaining_time": "13:46:34"} +{"current_steps": 265, "total_steps": 4048, "loss": 0.4574149549007416, "lr": 1.9987582138048405e-05, "epoch": 0.1309773878660571, "percentage": 6.55, "elapsed_time": "0:57:53", "remaining_time": "13:46:25"} +{"current_steps": 266, "total_steps": 4048, "loss": 0.4765186607837677, "lr": 1.9987171745580353e-05, "epoch": 0.13147164215989127, "percentage": 6.57, "elapsed_time": "0:58:06", "remaining_time": "13:46:13"} +{"current_steps": 267, "total_steps": 4048, "loss": 0.4900081753730774, "lr": 1.998675468581915e-05, "epoch": 0.13196589645372545, "percentage": 6.6, "elapsed_time": "0:58:20", "remaining_time": "13:46:08"} +{"current_steps": 268, "total_steps": 4048, "loss": 0.433933287858963, "lr": 1.9986330959043206e-05, "epoch": 0.13246015074755962, "percentage": 6.62, "elapsed_time": "0:58:33", "remaining_time": "13:45:51"} +{"current_steps": 269, "total_steps": 4048, "loss": 0.452491819858551, "lr": 1.9985900565535403e-05, "epoch": 0.1329544050413938, "percentage": 6.65, "elapsed_time": "0:58:46", "remaining_time": "13:45:44"} +{"current_steps": 270, "total_steps": 4048, "loss": 0.4583294987678528, "lr": 1.9985463505583062e-05, "epoch": 0.13344865933522798, "percentage": 6.67, "elapsed_time": "0:58:59", "remaining_time": "13:45:28"} +{"current_steps": 271, "total_steps": 4048, "loss": 0.43183961510658264, "lr": 1.9985019779477958e-05, "epoch": 0.13394291362906216, "percentage": 6.69, "elapsed_time": "0:59:13", "remaining_time": "13:45:22"} +{"current_steps": 272, "total_steps": 4048, "loss": 0.48075324296951294, "lr": 1.998456938751632e-05, "epoch": 0.13443716792289634, "percentage": 6.72, "elapsed_time": "0:59:26", "remaining_time": "13:45:11"} +{"current_steps": 273, "total_steps": 4048, "loss": 0.5131007432937622, "lr": 1.9984112329998825e-05, "epoch": 0.13493142221673052, "percentage": 6.74, "elapsed_time": "0:59:39", "remaining_time": "13:45:03"} +{"current_steps": 274, "total_steps": 4048, "loss": 0.4841446876525879, "lr": 1.998364860723059e-05, "epoch": 0.1354256765105647, "percentage": 6.77, "elapsed_time": "0:59:52", "remaining_time": "13:44:47"} +{"current_steps": 275, "total_steps": 4048, "loss": 0.5001078248023987, "lr": 1.9983178219521194e-05, "epoch": 0.13591993080439888, "percentage": 6.79, "elapsed_time": "1:00:06", "remaining_time": "13:44:42"} +{"current_steps": 276, "total_steps": 4048, "loss": 0.44851893186569214, "lr": 1.998270116718466e-05, "epoch": 0.13641418509823305, "percentage": 6.82, "elapsed_time": "1:00:19", "remaining_time": "13:44:26"} +{"current_steps": 277, "total_steps": 4048, "loss": 0.4635714888572693, "lr": 1.9982217450539464e-05, "epoch": 0.1369084393920672, "percentage": 6.84, "elapsed_time": "1:00:32", "remaining_time": "13:44:13"} +{"current_steps": 278, "total_steps": 4048, "loss": 0.4171838164329529, "lr": 1.9981727069908525e-05, "epoch": 0.13740269368590138, "percentage": 6.87, "elapsed_time": "1:00:46", "remaining_time": "13:44:04"} +{"current_steps": 279, "total_steps": 4048, "loss": 0.4819942116737366, "lr": 1.9981230025619216e-05, "epoch": 0.13789694797973556, "percentage": 6.89, "elapsed_time": "1:00:59", "remaining_time": "13:43:50"} +{"current_steps": 280, "total_steps": 4048, "loss": 0.47878971695899963, "lr": 1.998072631800336e-05, "epoch": 0.13839120227356974, "percentage": 6.92, "elapsed_time": "1:01:12", "remaining_time": "13:43:40"} +{"current_steps": 281, "total_steps": 4048, "loss": 0.4436519145965576, "lr": 1.9980215947397217e-05, "epoch": 0.13888545656740392, "percentage": 6.94, "elapsed_time": "1:01:25", "remaining_time": "13:43:25"} +{"current_steps": 282, "total_steps": 4048, "loss": 0.4633050262928009, "lr": 1.9979698914141507e-05, "epoch": 0.1393797108612381, "percentage": 6.97, "elapsed_time": "1:01:38", "remaining_time": "13:43:15"} +{"current_steps": 283, "total_steps": 4048, "loss": 0.4264826774597168, "lr": 1.9979175218581397e-05, "epoch": 0.13987396515507228, "percentage": 6.99, "elapsed_time": "1:01:51", "remaining_time": "13:43:00"} +{"current_steps": 284, "total_steps": 4048, "loss": 0.47763916850090027, "lr": 1.9978644861066493e-05, "epoch": 0.14036821944890646, "percentage": 7.02, "elapsed_time": "1:02:05", "remaining_time": "13:42:56"} +{"current_steps": 285, "total_steps": 4048, "loss": 0.44895434379577637, "lr": 1.997810784195086e-05, "epoch": 0.14086247374274063, "percentage": 7.04, "elapsed_time": "1:02:18", "remaining_time": "13:42:41"} +{"current_steps": 286, "total_steps": 4048, "loss": 0.4287600517272949, "lr": 1.9977564161593e-05, "epoch": 0.1413567280365748, "percentage": 7.07, "elapsed_time": "1:02:31", "remaining_time": "13:42:32"} +{"current_steps": 287, "total_steps": 4048, "loss": 0.44175297021865845, "lr": 1.997701382035587e-05, "epoch": 0.141850982330409, "percentage": 7.09, "elapsed_time": "1:02:44", "remaining_time": "13:42:16"} +{"current_steps": 288, "total_steps": 4048, "loss": 0.4393232464790344, "lr": 1.9976456818606868e-05, "epoch": 0.14234523662424317, "percentage": 7.11, "elapsed_time": "1:02:58", "remaining_time": "13:42:12"} +{"current_steps": 289, "total_steps": 4048, "loss": 0.4600023329257965, "lr": 1.9975893156717836e-05, "epoch": 0.14283949091807735, "percentage": 7.14, "elapsed_time": "1:03:11", "remaining_time": "13:41:56"} +{"current_steps": 290, "total_steps": 4048, "loss": 0.4819300174713135, "lr": 1.9975322835065075e-05, "epoch": 0.14333374521191153, "percentage": 7.16, "elapsed_time": "1:03:24", "remaining_time": "13:41:45"} +{"current_steps": 291, "total_steps": 4048, "loss": 0.4391498267650604, "lr": 1.9974745854029318e-05, "epoch": 0.1438279995057457, "percentage": 7.19, "elapsed_time": "1:03:38", "remaining_time": "13:41:37"} +{"current_steps": 292, "total_steps": 4048, "loss": 0.43435904383659363, "lr": 1.9974162213995748e-05, "epoch": 0.14432225379957989, "percentage": 7.21, "elapsed_time": "1:03:51", "remaining_time": "13:41:29"} +{"current_steps": 293, "total_steps": 4048, "loss": 0.43575727939605713, "lr": 1.9973571915354e-05, "epoch": 0.14481650809341406, "percentage": 7.24, "elapsed_time": "1:04:04", "remaining_time": "13:41:14"} +{"current_steps": 294, "total_steps": 4048, "loss": 0.39998459815979004, "lr": 1.9972974958498145e-05, "epoch": 0.14531076238724824, "percentage": 7.26, "elapsed_time": "1:04:18", "remaining_time": "13:41:08"} +{"current_steps": 295, "total_steps": 4048, "loss": 0.4620361030101776, "lr": 1.9972371343826705e-05, "epoch": 0.14580501668108242, "percentage": 7.29, "elapsed_time": "1:04:31", "remaining_time": "13:40:58"} +{"current_steps": 296, "total_steps": 4048, "loss": 0.5172264575958252, "lr": 1.9971761071742644e-05, "epoch": 0.1462992709749166, "percentage": 7.31, "elapsed_time": "1:04:44", "remaining_time": "13:40:44"} +{"current_steps": 297, "total_steps": 4048, "loss": 0.4685489535331726, "lr": 1.997114414265337e-05, "epoch": 0.14679352526875078, "percentage": 7.34, "elapsed_time": "1:04:58", "remaining_time": "13:40:41"} +{"current_steps": 298, "total_steps": 4048, "loss": 0.4346499741077423, "lr": 1.9970520556970735e-05, "epoch": 0.14728777956258496, "percentage": 7.36, "elapsed_time": "1:05:11", "remaining_time": "13:40:20"} +{"current_steps": 299, "total_steps": 4048, "loss": 0.4051141142845154, "lr": 1.996989031511104e-05, "epoch": 0.14778203385641914, "percentage": 7.39, "elapsed_time": "1:05:24", "remaining_time": "13:40:08"} +{"current_steps": 300, "total_steps": 4048, "loss": 0.4862591028213501, "lr": 1.996925341749502e-05, "epoch": 0.14827628815025332, "percentage": 7.41, "elapsed_time": "1:05:37", "remaining_time": "13:39:52"} +{"current_steps": 301, "total_steps": 4048, "loss": 0.44075754284858704, "lr": 1.996860986454787e-05, "epoch": 0.1487705424440875, "percentage": 7.44, "elapsed_time": "1:05:57", "remaining_time": "13:41:05"} +{"current_steps": 302, "total_steps": 4048, "loss": 0.44321805238723755, "lr": 1.99679596566992e-05, "epoch": 0.14926479673792167, "percentage": 7.46, "elapsed_time": "1:06:10", "remaining_time": "13:40:49"} +{"current_steps": 303, "total_steps": 4048, "loss": 0.4468157887458801, "lr": 1.996730279438309e-05, "epoch": 0.14975905103175585, "percentage": 7.49, "elapsed_time": "1:06:24", "remaining_time": "13:40:42"} +{"current_steps": 304, "total_steps": 4048, "loss": 0.48698270320892334, "lr": 1.996663927803805e-05, "epoch": 0.15025330532559, "percentage": 7.51, "elapsed_time": "1:06:36", "remaining_time": "13:40:26"} +{"current_steps": 305, "total_steps": 4048, "loss": 0.41898253560066223, "lr": 1.9965969108107032e-05, "epoch": 0.15074755961942418, "percentage": 7.53, "elapsed_time": "1:06:50", "remaining_time": "13:40:17"} +{"current_steps": 306, "total_steps": 4048, "loss": 0.4827130436897278, "lr": 1.9965292285037437e-05, "epoch": 0.15124181391325836, "percentage": 7.56, "elapsed_time": "1:07:03", "remaining_time": "13:40:06"} +{"current_steps": 307, "total_steps": 4048, "loss": 0.4219037592411041, "lr": 1.99646088092811e-05, "epoch": 0.15173606820709254, "percentage": 7.58, "elapsed_time": "1:07:17", "remaining_time": "13:39:57"} +{"current_steps": 308, "total_steps": 4048, "loss": 0.4431123733520508, "lr": 1.9963918681294298e-05, "epoch": 0.15223032250092672, "percentage": 7.61, "elapsed_time": "1:07:29", "remaining_time": "13:39:37"} +{"current_steps": 309, "total_steps": 4048, "loss": 0.4161941409111023, "lr": 1.996322190153775e-05, "epoch": 0.1527245767947609, "percentage": 7.63, "elapsed_time": "1:07:43", "remaining_time": "13:39:24"} +{"current_steps": 310, "total_steps": 4048, "loss": 0.4774768650531769, "lr": 1.9962518470476617e-05, "epoch": 0.15321883108859508, "percentage": 7.66, "elapsed_time": "1:07:55", "remaining_time": "13:39:03"} +{"current_steps": 311, "total_steps": 4048, "loss": 0.4196036159992218, "lr": 1.9961808388580503e-05, "epoch": 0.15371308538242925, "percentage": 7.68, "elapsed_time": "1:08:09", "remaining_time": "13:38:55"} +{"current_steps": 312, "total_steps": 4048, "loss": 0.44241398572921753, "lr": 1.996109165632344e-05, "epoch": 0.15420733967626343, "percentage": 7.71, "elapsed_time": "1:08:21", "remaining_time": "13:38:36"} +{"current_steps": 313, "total_steps": 4048, "loss": 0.47662627696990967, "lr": 1.996036827418392e-05, "epoch": 0.1547015939700976, "percentage": 7.73, "elapsed_time": "1:08:34", "remaining_time": "13:38:21"} +{"current_steps": 314, "total_steps": 4048, "loss": 0.4241487979888916, "lr": 1.9959638242644855e-05, "epoch": 0.1551958482639318, "percentage": 7.76, "elapsed_time": "1:08:48", "remaining_time": "13:38:10"} +{"current_steps": 315, "total_steps": 4048, "loss": 0.45686113834381104, "lr": 1.9958901562193605e-05, "epoch": 0.15569010255776597, "percentage": 7.78, "elapsed_time": "1:09:01", "remaining_time": "13:37:55"} +{"current_steps": 316, "total_steps": 4048, "loss": 0.4154825806617737, "lr": 1.9958158233321968e-05, "epoch": 0.15618435685160015, "percentage": 7.81, "elapsed_time": "1:09:13", "remaining_time": "13:37:37"} +{"current_steps": 317, "total_steps": 4048, "loss": 0.4705435037612915, "lr": 1.9957408256526176e-05, "epoch": 0.15667861114543433, "percentage": 7.83, "elapsed_time": "1:09:26", "remaining_time": "13:37:18"} +{"current_steps": 318, "total_steps": 4048, "loss": 0.4367898404598236, "lr": 1.9956651632306908e-05, "epoch": 0.1571728654392685, "percentage": 7.86, "elapsed_time": "1:09:39", "remaining_time": "13:37:01"} +{"current_steps": 319, "total_steps": 4048, "loss": 0.4668901264667511, "lr": 1.9955888361169272e-05, "epoch": 0.15766711973310268, "percentage": 7.88, "elapsed_time": "1:09:51", "remaining_time": "13:36:37"} +{"current_steps": 320, "total_steps": 4048, "loss": 0.46429356932640076, "lr": 1.995511844362282e-05, "epoch": 0.15816137402693686, "percentage": 7.91, "elapsed_time": "1:10:05", "remaining_time": "13:36:28"} +{"current_steps": 321, "total_steps": 4048, "loss": 0.4582952857017517, "lr": 1.9954341880181536e-05, "epoch": 0.15865562832077104, "percentage": 7.93, "elapsed_time": "1:10:17", "remaining_time": "13:36:12"} +{"current_steps": 322, "total_steps": 4048, "loss": 0.45110762119293213, "lr": 1.9953558671363843e-05, "epoch": 0.15914988261460522, "percentage": 7.95, "elapsed_time": "1:10:31", "remaining_time": "13:35:59"} +{"current_steps": 323, "total_steps": 4048, "loss": 0.4049065113067627, "lr": 1.99527688176926e-05, "epoch": 0.1596441369084394, "percentage": 7.98, "elapsed_time": "1:10:43", "remaining_time": "13:35:39"} +{"current_steps": 324, "total_steps": 4048, "loss": 0.40884825587272644, "lr": 1.9951972319695105e-05, "epoch": 0.16013839120227358, "percentage": 8.0, "elapsed_time": "1:10:57", "remaining_time": "13:35:30"} +{"current_steps": 325, "total_steps": 4048, "loss": 0.4416786730289459, "lr": 1.9951169177903084e-05, "epoch": 0.16063264549610776, "percentage": 8.03, "elapsed_time": "1:11:09", "remaining_time": "13:35:10"} +{"current_steps": 326, "total_steps": 4048, "loss": 0.4318765103816986, "lr": 1.9950359392852704e-05, "epoch": 0.16112689978994194, "percentage": 8.05, "elapsed_time": "1:11:23", "remaining_time": "13:35:01"} +{"current_steps": 327, "total_steps": 4048, "loss": 0.4415965974330902, "lr": 1.9949542965084564e-05, "epoch": 0.16162115408377611, "percentage": 8.08, "elapsed_time": "1:11:35", "remaining_time": "13:34:42"} +{"current_steps": 328, "total_steps": 4048, "loss": 0.4816298186779022, "lr": 1.9948719895143703e-05, "epoch": 0.1621154083776103, "percentage": 8.1, "elapsed_time": "1:11:48", "remaining_time": "13:34:27"} +{"current_steps": 329, "total_steps": 4048, "loss": 0.4329088032245636, "lr": 1.9947890183579594e-05, "epoch": 0.16260966267144447, "percentage": 8.13, "elapsed_time": "1:12:01", "remaining_time": "13:34:07"} +{"current_steps": 330, "total_steps": 4048, "loss": 0.43193015456199646, "lr": 1.9947053830946134e-05, "epoch": 0.16310391696527862, "percentage": 8.15, "elapsed_time": "1:12:13", "remaining_time": "13:33:42"} +{"current_steps": 331, "total_steps": 4048, "loss": 0.48738086223602295, "lr": 1.994621083780166e-05, "epoch": 0.1635981712591128, "percentage": 8.18, "elapsed_time": "1:12:26", "remaining_time": "13:33:30"} +{"current_steps": 332, "total_steps": 4048, "loss": 0.4707815647125244, "lr": 1.9945361204708948e-05, "epoch": 0.16409242555294698, "percentage": 8.2, "elapsed_time": "1:12:39", "remaining_time": "13:33:11"} +{"current_steps": 333, "total_steps": 4048, "loss": 0.4190637469291687, "lr": 1.9944504932235198e-05, "epoch": 0.16458667984678116, "percentage": 8.23, "elapsed_time": "1:12:52", "remaining_time": "13:33:05"} +{"current_steps": 334, "total_steps": 4048, "loss": 0.45955735445022583, "lr": 1.9943642020952042e-05, "epoch": 0.16508093414061534, "percentage": 8.25, "elapsed_time": "1:13:05", "remaining_time": "13:32:47"} +{"current_steps": 335, "total_steps": 4048, "loss": 0.4675702750682831, "lr": 1.9942772471435555e-05, "epoch": 0.16557518843444952, "percentage": 8.28, "elapsed_time": "1:13:18", "remaining_time": "13:32:34"} +{"current_steps": 336, "total_steps": 4048, "loss": 0.42571327090263367, "lr": 1.9941896284266224e-05, "epoch": 0.1660694427282837, "percentage": 8.3, "elapsed_time": "1:13:31", "remaining_time": "13:32:17"} +{"current_steps": 337, "total_steps": 4048, "loss": 0.4341443181037903, "lr": 1.994101346002899e-05, "epoch": 0.16656369702211787, "percentage": 8.33, "elapsed_time": "1:13:44", "remaining_time": "13:32:06"} +{"current_steps": 338, "total_steps": 4048, "loss": 0.4473035931587219, "lr": 1.9940123999313214e-05, "epoch": 0.16705795131595205, "percentage": 8.35, "elapsed_time": "1:13:57", "remaining_time": "13:31:46"} +{"current_steps": 339, "total_steps": 4048, "loss": 0.4692152142524719, "lr": 1.9939227902712676e-05, "epoch": 0.16755220560978623, "percentage": 8.37, "elapsed_time": "1:14:10", "remaining_time": "13:31:34"} +{"current_steps": 340, "total_steps": 4048, "loss": 0.4169067442417145, "lr": 1.9938325170825607e-05, "epoch": 0.1680464599036204, "percentage": 8.4, "elapsed_time": "1:14:23", "remaining_time": "13:31:18"} +{"current_steps": 341, "total_steps": 4048, "loss": 0.451092928647995, "lr": 1.9937415804254657e-05, "epoch": 0.1685407141974546, "percentage": 8.42, "elapsed_time": "1:14:37", "remaining_time": "13:31:09"} +{"current_steps": 342, "total_steps": 4048, "loss": 0.39640212059020996, "lr": 1.99364998036069e-05, "epoch": 0.16903496849128877, "percentage": 8.45, "elapsed_time": "1:14:49", "remaining_time": "13:30:53"} +{"current_steps": 343, "total_steps": 4048, "loss": 0.46396374702453613, "lr": 1.9935577169493854e-05, "epoch": 0.16952922278512295, "percentage": 8.47, "elapsed_time": "1:15:03", "remaining_time": "13:30:40"} +{"current_steps": 344, "total_steps": 4048, "loss": 0.4343748390674591, "lr": 1.9934647902531453e-05, "epoch": 0.17002347707895712, "percentage": 8.5, "elapsed_time": "1:15:15", "remaining_time": "13:30:21"} +{"current_steps": 345, "total_steps": 4048, "loss": 0.4353589713573456, "lr": 1.9933712003340056e-05, "epoch": 0.1705177313727913, "percentage": 8.52, "elapsed_time": "1:15:28", "remaining_time": "13:30:10"} +{"current_steps": 346, "total_steps": 4048, "loss": 0.4423677623271942, "lr": 1.9932769472544464e-05, "epoch": 0.17101198566662548, "percentage": 8.55, "elapsed_time": "1:15:41", "remaining_time": "13:29:53"} +{"current_steps": 347, "total_steps": 4048, "loss": 0.4382045865058899, "lr": 1.9931820310773894e-05, "epoch": 0.17150623996045966, "percentage": 8.57, "elapsed_time": "1:15:54", "remaining_time": "13:29:38"} +{"current_steps": 348, "total_steps": 4048, "loss": 0.3966183066368103, "lr": 1.993086451866199e-05, "epoch": 0.17200049425429384, "percentage": 8.6, "elapsed_time": "1:16:07", "remaining_time": "13:29:24"} +{"current_steps": 349, "total_steps": 4048, "loss": 0.48624011874198914, "lr": 1.9929902096846833e-05, "epoch": 0.17249474854812802, "percentage": 8.62, "elapsed_time": "1:16:20", "remaining_time": "13:29:08"} +{"current_steps": 350, "total_steps": 4048, "loss": 0.4442569315433502, "lr": 1.9928933045970913e-05, "epoch": 0.1729890028419622, "percentage": 8.65, "elapsed_time": "1:16:33", "remaining_time": "13:28:57"} +{"current_steps": 351, "total_steps": 4048, "loss": 0.42499929666519165, "lr": 1.992795736668116e-05, "epoch": 0.17348325713579638, "percentage": 8.67, "elapsed_time": "1:16:46", "remaining_time": "13:28:40"} +{"current_steps": 352, "total_steps": 4048, "loss": 0.4230741858482361, "lr": 1.9926975059628923e-05, "epoch": 0.17397751142963055, "percentage": 8.7, "elapsed_time": "1:16:59", "remaining_time": "13:28:29"} +{"current_steps": 353, "total_steps": 4048, "loss": 0.4273882806301117, "lr": 1.9925986125469974e-05, "epoch": 0.17447176572346473, "percentage": 8.72, "elapsed_time": "1:17:12", "remaining_time": "13:28:11"} +{"current_steps": 354, "total_steps": 4048, "loss": 0.45237618684768677, "lr": 1.9924990564864513e-05, "epoch": 0.1749660200172989, "percentage": 8.75, "elapsed_time": "1:17:25", "remaining_time": "13:27:57"} +{"current_steps": 355, "total_steps": 4048, "loss": 0.4115524888038635, "lr": 1.9923988378477165e-05, "epoch": 0.1754602743111331, "percentage": 8.77, "elapsed_time": "1:17:38", "remaining_time": "13:27:39"} +{"current_steps": 356, "total_steps": 4048, "loss": 0.4476633071899414, "lr": 1.9922979566976968e-05, "epoch": 0.17595452860496724, "percentage": 8.79, "elapsed_time": "1:17:51", "remaining_time": "13:27:26"} +{"current_steps": 357, "total_steps": 4048, "loss": 0.44930100440979004, "lr": 1.9921964131037398e-05, "epoch": 0.17644878289880142, "percentage": 8.82, "elapsed_time": "1:18:04", "remaining_time": "13:27:09"} +{"current_steps": 358, "total_steps": 4048, "loss": 0.4714374244213104, "lr": 1.9920942071336338e-05, "epoch": 0.1769430371926356, "percentage": 8.84, "elapsed_time": "1:18:17", "remaining_time": "13:26:59"} +{"current_steps": 359, "total_steps": 4048, "loss": 0.47696003317832947, "lr": 1.9919913388556105e-05, "epoch": 0.17743729148646978, "percentage": 8.87, "elapsed_time": "1:18:30", "remaining_time": "13:26:38"} +{"current_steps": 360, "total_steps": 4048, "loss": 0.44937074184417725, "lr": 1.9918878083383434e-05, "epoch": 0.17793154578030396, "percentage": 8.89, "elapsed_time": "1:18:42", "remaining_time": "13:26:17"} +{"current_steps": 361, "total_steps": 4048, "loss": 0.44937658309936523, "lr": 1.9917836156509472e-05, "epoch": 0.17842580007413814, "percentage": 8.92, "elapsed_time": "1:18:55", "remaining_time": "13:26:03"} +{"current_steps": 362, "total_steps": 4048, "loss": 0.42068418860435486, "lr": 1.9916787608629805e-05, "epoch": 0.17892005436797231, "percentage": 8.94, "elapsed_time": "1:19:07", "remaining_time": "13:25:43"} +{"current_steps": 363, "total_steps": 4048, "loss": 0.3791036605834961, "lr": 1.9915732440444428e-05, "epoch": 0.1794143086618065, "percentage": 8.97, "elapsed_time": "1:19:21", "remaining_time": "13:25:33"} +{"current_steps": 364, "total_steps": 4048, "loss": 0.401694118976593, "lr": 1.991467065265775e-05, "epoch": 0.17990856295564067, "percentage": 8.99, "elapsed_time": "1:19:33", "remaining_time": "13:25:10"} +{"current_steps": 365, "total_steps": 4048, "loss": 0.44095057249069214, "lr": 1.9913602245978602e-05, "epoch": 0.18040281724947485, "percentage": 9.02, "elapsed_time": "1:19:45", "remaining_time": "13:24:50"} +{"current_steps": 366, "total_steps": 4048, "loss": 0.435880184173584, "lr": 1.9912527221120248e-05, "epoch": 0.18089707154330903, "percentage": 9.04, "elapsed_time": "1:19:57", "remaining_time": "13:24:27"} +{"current_steps": 367, "total_steps": 4048, "loss": 0.4147350490093231, "lr": 1.991144557880035e-05, "epoch": 0.1813913258371432, "percentage": 9.07, "elapsed_time": "1:20:10", "remaining_time": "13:24:06"} +{"current_steps": 368, "total_steps": 4048, "loss": 0.4191502630710602, "lr": 1.9910357319741006e-05, "epoch": 0.1818855801309774, "percentage": 9.09, "elapsed_time": "1:20:22", "remaining_time": "13:23:44"} +{"current_steps": 369, "total_steps": 4048, "loss": 0.41988956928253174, "lr": 1.9909262444668715e-05, "epoch": 0.18237983442481157, "percentage": 9.12, "elapsed_time": "1:20:35", "remaining_time": "13:23:27"} +{"current_steps": 370, "total_steps": 4048, "loss": 0.47451251745224, "lr": 1.99081609543144e-05, "epoch": 0.18287408871864574, "percentage": 9.14, "elapsed_time": "1:20:47", "remaining_time": "13:23:04"} +{"current_steps": 371, "total_steps": 4048, "loss": 0.44665899872779846, "lr": 1.9907052849413408e-05, "epoch": 0.18336834301247992, "percentage": 9.17, "elapsed_time": "1:20:59", "remaining_time": "13:22:46"} +{"current_steps": 372, "total_steps": 4048, "loss": 0.40575331449508667, "lr": 1.990593813070548e-05, "epoch": 0.1838625973063141, "percentage": 9.19, "elapsed_time": "1:21:11", "remaining_time": "13:22:21"} +{"current_steps": 373, "total_steps": 4048, "loss": 0.40580621361732483, "lr": 1.99048167989348e-05, "epoch": 0.18435685160014828, "percentage": 9.21, "elapsed_time": "1:21:24", "remaining_time": "13:22:03"} +{"current_steps": 374, "total_steps": 4048, "loss": 0.461843878030777, "lr": 1.9903688854849948e-05, "epoch": 0.18485110589398246, "percentage": 9.24, "elapsed_time": "1:21:36", "remaining_time": "13:21:38"} +{"current_steps": 375, "total_steps": 4048, "loss": 0.38992881774902344, "lr": 1.990255429920392e-05, "epoch": 0.18534536018781664, "percentage": 9.26, "elapsed_time": "1:21:48", "remaining_time": "13:21:19"} +{"current_steps": 376, "total_steps": 4048, "loss": 0.4288073480129242, "lr": 1.9901413132754133e-05, "epoch": 0.18583961448165082, "percentage": 9.29, "elapsed_time": "1:22:00", "remaining_time": "13:20:55"} +{"current_steps": 377, "total_steps": 4048, "loss": 0.4376278221607208, "lr": 1.9900265356262418e-05, "epoch": 0.186333868775485, "percentage": 9.31, "elapsed_time": "1:22:13", "remaining_time": "13:20:38"} +{"current_steps": 378, "total_steps": 4048, "loss": 0.4127569794654846, "lr": 1.9899110970495e-05, "epoch": 0.18682812306931917, "percentage": 9.34, "elapsed_time": "1:22:25", "remaining_time": "13:20:14"} +{"current_steps": 379, "total_steps": 4048, "loss": 0.4221431016921997, "lr": 1.9897949976222543e-05, "epoch": 0.18732237736315335, "percentage": 9.36, "elapsed_time": "1:22:37", "remaining_time": "13:19:51"} +{"current_steps": 380, "total_steps": 4048, "loss": 0.3540682792663574, "lr": 1.9896782374220108e-05, "epoch": 0.18781663165698753, "percentage": 9.39, "elapsed_time": "1:22:50", "remaining_time": "13:19:33"} +{"current_steps": 381, "total_steps": 4048, "loss": 0.3746468424797058, "lr": 1.9895608165267165e-05, "epoch": 0.1883108859508217, "percentage": 9.41, "elapsed_time": "1:23:02", "remaining_time": "13:19:15"} +{"current_steps": 382, "total_steps": 4048, "loss": 0.44986462593078613, "lr": 1.9894427350147602e-05, "epoch": 0.1888051402446559, "percentage": 9.44, "elapsed_time": "1:23:15", "remaining_time": "13:19:05"} +{"current_steps": 383, "total_steps": 4048, "loss": 0.38902726769447327, "lr": 1.9893239929649716e-05, "epoch": 0.18929939453849004, "percentage": 9.46, "elapsed_time": "1:23:29", "remaining_time": "13:18:52"} +{"current_steps": 384, "total_steps": 4048, "loss": 0.43202030658721924, "lr": 1.9892045904566212e-05, "epoch": 0.18979364883232422, "percentage": 9.49, "elapsed_time": "1:23:42", "remaining_time": "13:18:42"} +{"current_steps": 385, "total_steps": 4048, "loss": 0.3984760344028473, "lr": 1.9890845275694197e-05, "epoch": 0.1902879031261584, "percentage": 9.51, "elapsed_time": "1:23:55", "remaining_time": "13:18:26"} +{"current_steps": 386, "total_steps": 4048, "loss": 0.41927874088287354, "lr": 1.9889638043835203e-05, "epoch": 0.19078215741999258, "percentage": 9.54, "elapsed_time": "1:24:08", "remaining_time": "13:18:15"} +{"current_steps": 387, "total_steps": 4048, "loss": 0.3809741735458374, "lr": 1.9888424209795153e-05, "epoch": 0.19127641171382676, "percentage": 9.56, "elapsed_time": "1:24:21", "remaining_time": "13:17:59"} +{"current_steps": 388, "total_steps": 4048, "loss": 0.4237920045852661, "lr": 1.988720377438439e-05, "epoch": 0.19177066600766093, "percentage": 9.58, "elapsed_time": "1:24:34", "remaining_time": "13:17:51"} +{"current_steps": 389, "total_steps": 4048, "loss": 0.4065277576446533, "lr": 1.9885976738417662e-05, "epoch": 0.1922649203014951, "percentage": 9.61, "elapsed_time": "1:24:47", "remaining_time": "13:17:34"} +{"current_steps": 390, "total_steps": 4048, "loss": 0.41154375672340393, "lr": 1.9884743102714116e-05, "epoch": 0.1927591745953293, "percentage": 9.63, "elapsed_time": "1:25:00", "remaining_time": "13:17:18"} +{"current_steps": 391, "total_steps": 4048, "loss": 0.46544453501701355, "lr": 1.9883502868097304e-05, "epoch": 0.19325342888916347, "percentage": 9.66, "elapsed_time": "1:25:13", "remaining_time": "13:17:07"} +{"current_steps": 392, "total_steps": 4048, "loss": 0.41279950737953186, "lr": 1.9882256035395204e-05, "epoch": 0.19374768318299765, "percentage": 9.68, "elapsed_time": "1:25:26", "remaining_time": "13:16:51"} +{"current_steps": 393, "total_steps": 4048, "loss": 0.40083667635917664, "lr": 1.988100260544017e-05, "epoch": 0.19424193747683183, "percentage": 9.71, "elapsed_time": "1:25:39", "remaining_time": "13:16:41"} +{"current_steps": 394, "total_steps": 4048, "loss": 0.40041595697402954, "lr": 1.9879742579068976e-05, "epoch": 0.194736191770666, "percentage": 9.73, "elapsed_time": "1:25:52", "remaining_time": "13:16:25"} +{"current_steps": 395, "total_steps": 4048, "loss": 0.45317894220352173, "lr": 1.9878475957122803e-05, "epoch": 0.19523044606450018, "percentage": 9.76, "elapsed_time": "1:26:05", "remaining_time": "13:16:14"} +{"current_steps": 396, "total_steps": 4048, "loss": 0.4163329005241394, "lr": 1.987720274044723e-05, "epoch": 0.19572470035833436, "percentage": 9.78, "elapsed_time": "1:26:18", "remaining_time": "13:16:01"} +{"current_steps": 397, "total_steps": 4048, "loss": 0.4252028167247772, "lr": 1.9875922929892235e-05, "epoch": 0.19621895465216854, "percentage": 9.81, "elapsed_time": "1:26:32", "remaining_time": "13:15:52"} +{"current_steps": 398, "total_steps": 4048, "loss": 0.40558624267578125, "lr": 1.9874636526312202e-05, "epoch": 0.19671320894600272, "percentage": 9.83, "elapsed_time": "1:26:45", "remaining_time": "13:15:37"} +{"current_steps": 399, "total_steps": 4048, "loss": 0.4352114796638489, "lr": 1.9873343530565913e-05, "epoch": 0.1972074632398369, "percentage": 9.86, "elapsed_time": "1:26:58", "remaining_time": "13:15:28"} +{"current_steps": 400, "total_steps": 4048, "loss": 0.4076879024505615, "lr": 1.9872043943516556e-05, "epoch": 0.19770171753367108, "percentage": 9.88, "elapsed_time": "1:27:11", "remaining_time": "13:15:08"} +{"current_steps": 401, "total_steps": 4048, "loss": 0.4406166672706604, "lr": 1.987073776603172e-05, "epoch": 0.19819597182750526, "percentage": 9.91, "elapsed_time": "1:27:28", "remaining_time": "13:15:38"} +{"current_steps": 402, "total_steps": 4048, "loss": 0.3974360227584839, "lr": 1.9869424998983386e-05, "epoch": 0.19869022612133944, "percentage": 9.93, "elapsed_time": "1:27:41", "remaining_time": "13:15:23"} +{"current_steps": 403, "total_steps": 4048, "loss": 0.4297831058502197, "lr": 1.9868105643247934e-05, "epoch": 0.19918448041517361, "percentage": 9.96, "elapsed_time": "1:27:54", "remaining_time": "13:15:02"} +{"current_steps": 404, "total_steps": 4048, "loss": 0.4214811623096466, "lr": 1.986677969970616e-05, "epoch": 0.1996787347090078, "percentage": 9.98, "elapsed_time": "1:28:06", "remaining_time": "13:14:46"} +{"current_steps": 405, "total_steps": 4048, "loss": 0.37227538228034973, "lr": 1.9865447169243234e-05, "epoch": 0.20017298900284197, "percentage": 10.0, "elapsed_time": "1:28:19", "remaining_time": "13:14:27"} +{"current_steps": 406, "total_steps": 4048, "loss": 0.4367320239543915, "lr": 1.986410805274874e-05, "epoch": 0.20066724329667615, "percentage": 10.03, "elapsed_time": "1:28:32", "remaining_time": "13:14:14"} +{"current_steps": 407, "total_steps": 4048, "loss": 0.4327583909034729, "lr": 1.9862762351116646e-05, "epoch": 0.20116149759051033, "percentage": 10.05, "elapsed_time": "1:28:45", "remaining_time": "13:13:57"} +{"current_steps": 408, "total_steps": 4048, "loss": 0.45309939980506897, "lr": 1.9861410065245332e-05, "epoch": 0.2016557518843445, "percentage": 10.08, "elapsed_time": "1:28:58", "remaining_time": "13:13:44"} +{"current_steps": 409, "total_steps": 4048, "loss": 0.39196106791496277, "lr": 1.986005119603756e-05, "epoch": 0.20215000617817866, "percentage": 10.1, "elapsed_time": "1:29:10", "remaining_time": "13:13:27"} +{"current_steps": 410, "total_steps": 4048, "loss": 0.4037923812866211, "lr": 1.985868574440049e-05, "epoch": 0.20264426047201284, "percentage": 10.13, "elapsed_time": "1:29:23", "remaining_time": "13:13:13"} +{"current_steps": 411, "total_steps": 4048, "loss": 0.41214677691459656, "lr": 1.9857313711245684e-05, "epoch": 0.20313851476584702, "percentage": 10.15, "elapsed_time": "1:29:36", "remaining_time": "13:12:56"} +{"current_steps": 412, "total_steps": 4048, "loss": 0.4265231192111969, "lr": 1.9855935097489087e-05, "epoch": 0.2036327690596812, "percentage": 10.18, "elapsed_time": "1:29:49", "remaining_time": "13:12:44"} +{"current_steps": 413, "total_steps": 4048, "loss": 0.4245712161064148, "lr": 1.9854549904051046e-05, "epoch": 0.20412702335351537, "percentage": 10.2, "elapsed_time": "1:30:02", "remaining_time": "13:12:30"} +{"current_steps": 414, "total_steps": 4048, "loss": 0.36296984553337097, "lr": 1.985315813185629e-05, "epoch": 0.20462127764734955, "percentage": 10.23, "elapsed_time": "1:30:15", "remaining_time": "13:12:16"} +{"current_steps": 415, "total_steps": 4048, "loss": 0.3982447683811188, "lr": 1.985175978183395e-05, "epoch": 0.20511553194118373, "percentage": 10.25, "elapsed_time": "1:30:27", "remaining_time": "13:11:57"} +{"current_steps": 416, "total_steps": 4048, "loss": 0.4087941646575928, "lr": 1.9850354854917543e-05, "epoch": 0.2056097862350179, "percentage": 10.28, "elapsed_time": "1:30:40", "remaining_time": "13:11:40"} +{"current_steps": 417, "total_steps": 4048, "loss": 0.4147699177265167, "lr": 1.9848943352044982e-05, "epoch": 0.2061040405288521, "percentage": 10.3, "elapsed_time": "1:30:53", "remaining_time": "13:11:25"} +{"current_steps": 418, "total_steps": 4048, "loss": 0.42588335275650024, "lr": 1.9847525274158562e-05, "epoch": 0.20659829482268627, "percentage": 10.33, "elapsed_time": "1:31:06", "remaining_time": "13:11:11"} +{"current_steps": 419, "total_steps": 4048, "loss": 0.42607247829437256, "lr": 1.9846100622204975e-05, "epoch": 0.20709254911652045, "percentage": 10.35, "elapsed_time": "1:31:18", "remaining_time": "13:10:53"} +{"current_steps": 420, "total_steps": 4048, "loss": 0.3600303530693054, "lr": 1.9844669397135292e-05, "epoch": 0.20758680341035463, "percentage": 10.38, "elapsed_time": "1:31:31", "remaining_time": "13:10:34"} +{"current_steps": 421, "total_steps": 4048, "loss": 0.47888651490211487, "lr": 1.9843231599904988e-05, "epoch": 0.2080810577041888, "percentage": 10.4, "elapsed_time": "1:31:44", "remaining_time": "13:10:18"} +{"current_steps": 422, "total_steps": 4048, "loss": 0.3789903521537781, "lr": 1.9841787231473906e-05, "epoch": 0.20857531199802298, "percentage": 10.42, "elapsed_time": "1:31:56", "remaining_time": "13:10:01"} +{"current_steps": 423, "total_steps": 4048, "loss": 0.3682858943939209, "lr": 1.9840336292806292e-05, "epoch": 0.20906956629185716, "percentage": 10.45, "elapsed_time": "1:32:09", "remaining_time": "13:09:46"} +{"current_steps": 424, "total_steps": 4048, "loss": 0.42071375250816345, "lr": 1.9838878784870772e-05, "epoch": 0.20956382058569134, "percentage": 10.47, "elapsed_time": "1:32:21", "remaining_time": "13:09:27"} +{"current_steps": 425, "total_steps": 4048, "loss": 0.4258945882320404, "lr": 1.9837414708640353e-05, "epoch": 0.21005807487952552, "percentage": 10.5, "elapsed_time": "1:32:34", "remaining_time": "13:09:14"} +{"current_steps": 426, "total_steps": 4048, "loss": 0.42377644777297974, "lr": 1.9835944065092433e-05, "epoch": 0.2105523291733597, "percentage": 10.52, "elapsed_time": "1:32:47", "remaining_time": "13:08:55"} +{"current_steps": 427, "total_steps": 4048, "loss": 0.35860198736190796, "lr": 1.9834466855208795e-05, "epoch": 0.21104658346719388, "percentage": 10.55, "elapsed_time": "1:33:00", "remaining_time": "13:08:39"} +{"current_steps": 428, "total_steps": 4048, "loss": 0.3498537242412567, "lr": 1.9832983079975606e-05, "epoch": 0.21154083776102806, "percentage": 10.57, "elapsed_time": "1:33:12", "remaining_time": "13:08:18"} +{"current_steps": 429, "total_steps": 4048, "loss": 0.3779754042625427, "lr": 1.9831492740383405e-05, "epoch": 0.21203509205486223, "percentage": 10.6, "elapsed_time": "1:33:24", "remaining_time": "13:08:01"} +{"current_steps": 430, "total_steps": 4048, "loss": 0.3574570119380951, "lr": 1.9829995837427124e-05, "epoch": 0.2125293463486964, "percentage": 10.62, "elapsed_time": "1:33:36", "remaining_time": "13:07:40"} +{"current_steps": 431, "total_steps": 4048, "loss": 0.40678369998931885, "lr": 1.982849237210608e-05, "epoch": 0.2130236006425306, "percentage": 10.65, "elapsed_time": "1:33:49", "remaining_time": "13:07:19"} +{"current_steps": 432, "total_steps": 4048, "loss": 0.4392494261264801, "lr": 1.9826982345423955e-05, "epoch": 0.21351785493636477, "percentage": 10.67, "elapsed_time": "1:34:01", "remaining_time": "13:07:03"} +{"current_steps": 433, "total_steps": 4048, "loss": 0.3858703374862671, "lr": 1.982546575838883e-05, "epoch": 0.21401210923019895, "percentage": 10.7, "elapsed_time": "1:34:13", "remaining_time": "13:06:41"} +{"current_steps": 434, "total_steps": 4048, "loss": 0.4427873492240906, "lr": 1.9823942612013153e-05, "epoch": 0.21450636352403313, "percentage": 10.72, "elapsed_time": "1:34:26", "remaining_time": "13:06:24"} +{"current_steps": 435, "total_steps": 4048, "loss": 0.40610629320144653, "lr": 1.9822412907313756e-05, "epoch": 0.21500061781786728, "percentage": 10.75, "elapsed_time": "1:34:38", "remaining_time": "13:06:07"} +{"current_steps": 436, "total_steps": 4048, "loss": 0.4181024432182312, "lr": 1.9820876645311847e-05, "epoch": 0.21549487211170146, "percentage": 10.77, "elapsed_time": "1:34:51", "remaining_time": "13:05:51"} +{"current_steps": 437, "total_steps": 4048, "loss": 0.39591747522354126, "lr": 1.981933382703301e-05, "epoch": 0.21598912640553564, "percentage": 10.8, "elapsed_time": "1:35:04", "remaining_time": "13:05:33"} +{"current_steps": 438, "total_steps": 4048, "loss": 0.4326947033405304, "lr": 1.9817784453507215e-05, "epoch": 0.21648338069936982, "percentage": 10.82, "elapsed_time": "1:35:16", "remaining_time": "13:05:15"} +{"current_steps": 439, "total_steps": 4048, "loss": 0.42645522952079773, "lr": 1.98162285257688e-05, "epoch": 0.216977634993204, "percentage": 10.84, "elapsed_time": "1:35:28", "remaining_time": "13:04:53"} +{"current_steps": 440, "total_steps": 4048, "loss": 0.37372538447380066, "lr": 1.9814666044856472e-05, "epoch": 0.21747188928703817, "percentage": 10.87, "elapsed_time": "1:35:41", "remaining_time": "13:04:36"} +{"current_steps": 441, "total_steps": 4048, "loss": 0.44066423177719116, "lr": 1.9813097011813328e-05, "epoch": 0.21796614358087235, "percentage": 10.89, "elapsed_time": "1:35:52", "remaining_time": "13:04:14"} +{"current_steps": 442, "total_steps": 4048, "loss": 0.39892369508743286, "lr": 1.9811521427686833e-05, "epoch": 0.21846039787470653, "percentage": 10.92, "elapsed_time": "1:36:05", "remaining_time": "13:03:53"} +{"current_steps": 443, "total_steps": 4048, "loss": 0.43497514724731445, "lr": 1.980993929352882e-05, "epoch": 0.2189546521685407, "percentage": 10.94, "elapsed_time": "1:36:17", "remaining_time": "13:03:36"} +{"current_steps": 444, "total_steps": 4048, "loss": 0.3810148239135742, "lr": 1.9808350610395504e-05, "epoch": 0.2194489064623749, "percentage": 10.97, "elapsed_time": "1:36:29", "remaining_time": "13:03:15"} +{"current_steps": 445, "total_steps": 4048, "loss": 0.3952462673187256, "lr": 1.9806755379347465e-05, "epoch": 0.21994316075620907, "percentage": 10.99, "elapsed_time": "1:36:42", "remaining_time": "13:03:02"} +{"current_steps": 446, "total_steps": 4048, "loss": 0.39168232679367065, "lr": 1.9805153601449655e-05, "epoch": 0.22043741505004324, "percentage": 11.02, "elapsed_time": "1:36:55", "remaining_time": "13:02:43"} +{"current_steps": 447, "total_steps": 4048, "loss": 0.38572901487350464, "lr": 1.98035452777714e-05, "epoch": 0.22093166934387742, "percentage": 11.04, "elapsed_time": "1:37:07", "remaining_time": "13:02:26"} +{"current_steps": 448, "total_steps": 4048, "loss": 0.40514758229255676, "lr": 1.980193040938639e-05, "epoch": 0.2214259236377116, "percentage": 11.07, "elapsed_time": "1:37:19", "remaining_time": "13:02:06"} +{"current_steps": 449, "total_steps": 4048, "loss": 0.4289678931236267, "lr": 1.9800308997372696e-05, "epoch": 0.22192017793154578, "percentage": 11.09, "elapsed_time": "1:37:32", "remaining_time": "13:01:51"} +{"current_steps": 450, "total_steps": 4048, "loss": 0.4082314670085907, "lr": 1.979868104281274e-05, "epoch": 0.22241443222537996, "percentage": 11.12, "elapsed_time": "1:37:44", "remaining_time": "13:01:32"} +{"current_steps": 451, "total_steps": 4048, "loss": 0.3819827735424042, "lr": 1.979704654679333e-05, "epoch": 0.22290868651921414, "percentage": 11.14, "elapsed_time": "1:37:57", "remaining_time": "13:01:17"} +{"current_steps": 452, "total_steps": 4048, "loss": 0.42063748836517334, "lr": 1.979540551040563e-05, "epoch": 0.22340294081304832, "percentage": 11.17, "elapsed_time": "1:38:09", "remaining_time": "13:00:57"} +{"current_steps": 453, "total_steps": 4048, "loss": 0.41634586453437805, "lr": 1.9793757934745166e-05, "epoch": 0.2238971951068825, "percentage": 11.19, "elapsed_time": "1:38:22", "remaining_time": "13:00:41"} +{"current_steps": 454, "total_steps": 4048, "loss": 0.4151400625705719, "lr": 1.979210382091184e-05, "epoch": 0.22439144940071667, "percentage": 11.22, "elapsed_time": "1:38:35", "remaining_time": "13:00:25"} +{"current_steps": 455, "total_steps": 4048, "loss": 0.40609729290008545, "lr": 1.9790443170009918e-05, "epoch": 0.22488570369455085, "percentage": 11.24, "elapsed_time": "1:38:48", "remaining_time": "13:00:11"} +{"current_steps": 456, "total_steps": 4048, "loss": 0.38967129588127136, "lr": 1.9788775983148022e-05, "epoch": 0.22537995798838503, "percentage": 11.26, "elapsed_time": "1:39:00", "remaining_time": "12:59:57"} +{"current_steps": 457, "total_steps": 4048, "loss": 0.3833470940589905, "lr": 1.978710226143915e-05, "epoch": 0.2258742122822192, "percentage": 11.29, "elapsed_time": "1:39:14", "remaining_time": "12:59:49"} +{"current_steps": 458, "total_steps": 4048, "loss": 0.42918887734413147, "lr": 1.978542200600064e-05, "epoch": 0.2263684665760534, "percentage": 11.31, "elapsed_time": "1:39:27", "remaining_time": "12:59:33"} +{"current_steps": 459, "total_steps": 4048, "loss": 0.3793666660785675, "lr": 1.978373521795422e-05, "epoch": 0.22686272086988757, "percentage": 11.34, "elapsed_time": "1:39:39", "remaining_time": "12:59:17"} +{"current_steps": 460, "total_steps": 4048, "loss": 0.3885256350040436, "lr": 1.978204189842596e-05, "epoch": 0.22735697516372175, "percentage": 11.36, "elapsed_time": "1:39:52", "remaining_time": "12:59:00"} +{"current_steps": 461, "total_steps": 4048, "loss": 0.4003330171108246, "lr": 1.97803420485463e-05, "epoch": 0.22785122945755593, "percentage": 11.39, "elapsed_time": "1:40:04", "remaining_time": "12:58:42"} +{"current_steps": 462, "total_steps": 4048, "loss": 0.4050712585449219, "lr": 1.9778635669450026e-05, "epoch": 0.22834548375139008, "percentage": 11.41, "elapsed_time": "1:40:17", "remaining_time": "12:58:24"} +{"current_steps": 463, "total_steps": 4048, "loss": 0.4003967046737671, "lr": 1.9776922762276304e-05, "epoch": 0.22883973804522426, "percentage": 11.44, "elapsed_time": "1:40:29", "remaining_time": "12:58:06"} +{"current_steps": 464, "total_steps": 4048, "loss": 0.4506968855857849, "lr": 1.9775203328168643e-05, "epoch": 0.22933399233905843, "percentage": 11.46, "elapsed_time": "1:40:42", "remaining_time": "12:57:50"} +{"current_steps": 465, "total_steps": 4048, "loss": 0.3947281241416931, "lr": 1.9773477368274906e-05, "epoch": 0.2298282466328926, "percentage": 11.49, "elapsed_time": "1:40:54", "remaining_time": "12:57:32"} +{"current_steps": 466, "total_steps": 4048, "loss": 0.4166758954524994, "lr": 1.9771744883747326e-05, "epoch": 0.2303225009267268, "percentage": 11.51, "elapsed_time": "1:41:07", "remaining_time": "12:57:18"} +{"current_steps": 467, "total_steps": 4048, "loss": 0.40400344133377075, "lr": 1.9770005875742484e-05, "epoch": 0.23081675522056097, "percentage": 11.54, "elapsed_time": "1:41:19", "remaining_time": "12:56:59"} +{"current_steps": 468, "total_steps": 4048, "loss": 0.4143296480178833, "lr": 1.9768260345421312e-05, "epoch": 0.23131100951439515, "percentage": 11.56, "elapsed_time": "1:41:32", "remaining_time": "12:56:44"} +{"current_steps": 469, "total_steps": 4048, "loss": 0.39128193259239197, "lr": 1.976650829394911e-05, "epoch": 0.23180526380822933, "percentage": 11.59, "elapsed_time": "1:41:44", "remaining_time": "12:56:27"} +{"current_steps": 470, "total_steps": 4048, "loss": 0.4305758476257324, "lr": 1.9764749722495514e-05, "epoch": 0.2322995181020635, "percentage": 11.61, "elapsed_time": "1:41:57", "remaining_time": "12:56:12"} +{"current_steps": 471, "total_steps": 4048, "loss": 0.41711747646331787, "lr": 1.9762984632234523e-05, "epoch": 0.23279377239589769, "percentage": 11.64, "elapsed_time": "1:42:10", "remaining_time": "12:55:54"} +{"current_steps": 472, "total_steps": 4048, "loss": 0.43328845500946045, "lr": 1.976121302434449e-05, "epoch": 0.23328802668973186, "percentage": 11.66, "elapsed_time": "1:42:22", "remaining_time": "12:55:35"} +{"current_steps": 473, "total_steps": 4048, "loss": 0.38707420229911804, "lr": 1.975943490000811e-05, "epoch": 0.23378228098356604, "percentage": 11.68, "elapsed_time": "1:42:34", "remaining_time": "12:55:19"} +{"current_steps": 474, "total_steps": 4048, "loss": 0.390054851770401, "lr": 1.9757650260412438e-05, "epoch": 0.23427653527740022, "percentage": 11.71, "elapsed_time": "1:42:47", "remaining_time": "12:55:02"} +{"current_steps": 475, "total_steps": 4048, "loss": 0.45697346329689026, "lr": 1.9755859106748875e-05, "epoch": 0.2347707895712344, "percentage": 11.73, "elapsed_time": "1:43:00", "remaining_time": "12:54:47"} +{"current_steps": 476, "total_steps": 4048, "loss": 0.4381307363510132, "lr": 1.9754061440213165e-05, "epoch": 0.23526504386506858, "percentage": 11.76, "elapsed_time": "1:43:12", "remaining_time": "12:54:28"} +{"current_steps": 477, "total_steps": 4048, "loss": 0.4217841625213623, "lr": 1.9752257262005403e-05, "epoch": 0.23575929815890276, "percentage": 11.78, "elapsed_time": "1:43:25", "remaining_time": "12:54:18"} +{"current_steps": 478, "total_steps": 4048, "loss": 0.35968005657196045, "lr": 1.9750446573330038e-05, "epoch": 0.23625355245273694, "percentage": 11.81, "elapsed_time": "1:43:38", "remaining_time": "12:54:00"} +{"current_steps": 479, "total_steps": 4048, "loss": 0.3516439199447632, "lr": 1.9748629375395856e-05, "epoch": 0.23674780674657112, "percentage": 11.83, "elapsed_time": "1:43:50", "remaining_time": "12:53:45"} +{"current_steps": 480, "total_steps": 4048, "loss": 0.4078671634197235, "lr": 1.9746805669415995e-05, "epoch": 0.2372420610404053, "percentage": 11.86, "elapsed_time": "1:44:03", "remaining_time": "12:53:27"} +{"current_steps": 481, "total_steps": 4048, "loss": 0.39654213190078735, "lr": 1.9744975456607936e-05, "epoch": 0.23773631533423947, "percentage": 11.88, "elapsed_time": "1:44:16", "remaining_time": "12:53:13"} +{"current_steps": 482, "total_steps": 4048, "loss": 0.41271698474884033, "lr": 1.9743138738193498e-05, "epoch": 0.23823056962807365, "percentage": 11.91, "elapsed_time": "1:44:28", "remaining_time": "12:52:56"} +{"current_steps": 483, "total_steps": 4048, "loss": 0.3957251310348511, "lr": 1.974129551539885e-05, "epoch": 0.23872482392190783, "percentage": 11.93, "elapsed_time": "1:44:41", "remaining_time": "12:52:40"} +{"current_steps": 484, "total_steps": 4048, "loss": 0.39857393503189087, "lr": 1.9739445789454506e-05, "epoch": 0.239219078215742, "percentage": 11.96, "elapsed_time": "1:44:53", "remaining_time": "12:52:22"} +{"current_steps": 485, "total_steps": 4048, "loss": 0.4263526499271393, "lr": 1.973758956159531e-05, "epoch": 0.2397133325095762, "percentage": 11.98, "elapsed_time": "1:45:06", "remaining_time": "12:52:07"} +{"current_steps": 486, "total_steps": 4048, "loss": 0.3849489688873291, "lr": 1.9735726833060457e-05, "epoch": 0.24020758680341037, "percentage": 12.01, "elapsed_time": "1:45:18", "remaining_time": "12:51:50"} +{"current_steps": 487, "total_steps": 4048, "loss": 0.431019127368927, "lr": 1.9733857605093476e-05, "epoch": 0.24070184109724455, "percentage": 12.03, "elapsed_time": "1:45:31", "remaining_time": "12:51:35"} +{"current_steps": 488, "total_steps": 4048, "loss": 0.3740619421005249, "lr": 1.973198187894224e-05, "epoch": 0.2411960953910787, "percentage": 12.06, "elapsed_time": "1:45:43", "remaining_time": "12:51:18"} +{"current_steps": 489, "total_steps": 4048, "loss": 0.361680269241333, "lr": 1.9730099655858953e-05, "epoch": 0.24169034968491288, "percentage": 12.08, "elapsed_time": "1:45:56", "remaining_time": "12:51:03"} +{"current_steps": 490, "total_steps": 4048, "loss": 0.41683071851730347, "lr": 1.9728210937100162e-05, "epoch": 0.24218460397874705, "percentage": 12.1, "elapsed_time": "1:46:08", "remaining_time": "12:50:45"} +{"current_steps": 491, "total_steps": 4048, "loss": 0.3898739516735077, "lr": 1.9726315723926746e-05, "epoch": 0.24267885827258123, "percentage": 12.13, "elapsed_time": "1:46:21", "remaining_time": "12:50:28"} +{"current_steps": 492, "total_steps": 4048, "loss": 0.39339032769203186, "lr": 1.9724414017603925e-05, "epoch": 0.2431731125664154, "percentage": 12.15, "elapsed_time": "1:46:34", "remaining_time": "12:50:14"} +{"current_steps": 493, "total_steps": 4048, "loss": 0.401676744222641, "lr": 1.9722505819401255e-05, "epoch": 0.2436673668602496, "percentage": 12.18, "elapsed_time": "1:46:46", "remaining_time": "12:49:56"} +{"current_steps": 494, "total_steps": 4048, "loss": 0.3814789056777954, "lr": 1.9720591130592613e-05, "epoch": 0.24416162115408377, "percentage": 12.2, "elapsed_time": "1:46:59", "remaining_time": "12:49:41"} +{"current_steps": 495, "total_steps": 4048, "loss": 0.3980346918106079, "lr": 1.9718669952456226e-05, "epoch": 0.24465587544791795, "percentage": 12.23, "elapsed_time": "1:47:11", "remaining_time": "12:49:24"} +{"current_steps": 496, "total_steps": 4048, "loss": 0.4222795069217682, "lr": 1.971674228627464e-05, "epoch": 0.24515012974175213, "percentage": 12.25, "elapsed_time": "1:47:24", "remaining_time": "12:49:09"} +{"current_steps": 497, "total_steps": 4048, "loss": 0.3795197904109955, "lr": 1.971480813333474e-05, "epoch": 0.2456443840355863, "percentage": 12.28, "elapsed_time": "1:47:36", "remaining_time": "12:48:52"} +{"current_steps": 498, "total_steps": 4048, "loss": 0.3746161460876465, "lr": 1.971286749492774e-05, "epoch": 0.24613863832942048, "percentage": 12.3, "elapsed_time": "1:47:49", "remaining_time": "12:48:38"} +{"current_steps": 499, "total_steps": 4048, "loss": 0.3552350699901581, "lr": 1.9710920372349174e-05, "epoch": 0.24663289262325466, "percentage": 12.33, "elapsed_time": "1:48:01", "remaining_time": "12:48:21"} +{"current_steps": 500, "total_steps": 4048, "loss": 0.39690741896629333, "lr": 1.9708966766898925e-05, "epoch": 0.24712714691708884, "percentage": 12.35, "elapsed_time": "1:48:15", "remaining_time": "12:48:13"} +{"current_steps": 501, "total_steps": 4048, "loss": 0.39530014991760254, "lr": 1.9707006679881186e-05, "epoch": 0.24762140121092302, "percentage": 12.38, "elapsed_time": "1:48:33", "remaining_time": "12:48:36"} +{"current_steps": 502, "total_steps": 4048, "loss": 0.41228705644607544, "lr": 1.9705040112604483e-05, "epoch": 0.2481156555047572, "percentage": 12.4, "elapsed_time": "1:48:45", "remaining_time": "12:48:17"} +{"current_steps": 503, "total_steps": 4048, "loss": 0.4330476224422455, "lr": 1.9703067066381668e-05, "epoch": 0.24860990979859138, "percentage": 12.43, "elapsed_time": "1:48:58", "remaining_time": "12:48:00"} +{"current_steps": 504, "total_steps": 4048, "loss": 0.38365668058395386, "lr": 1.970108754252992e-05, "epoch": 0.24910416409242556, "percentage": 12.45, "elapsed_time": "1:49:10", "remaining_time": "12:47:40"} +{"current_steps": 505, "total_steps": 4048, "loss": 0.4419581890106201, "lr": 1.969910154237074e-05, "epoch": 0.24959841838625973, "percentage": 12.48, "elapsed_time": "1:49:22", "remaining_time": "12:47:24"} +{"current_steps": 506, "total_steps": 4048, "loss": 0.38741230964660645, "lr": 1.9697109067229957e-05, "epoch": 0.2500926726800939, "percentage": 12.5, "elapsed_time": "1:49:34", "remaining_time": "12:47:04"} +{"current_steps": 507, "total_steps": 4048, "loss": 0.41751983761787415, "lr": 1.969511011843771e-05, "epoch": 0.2505869269739281, "percentage": 12.52, "elapsed_time": "1:49:47", "remaining_time": "12:46:48"} +{"current_steps": 508, "total_steps": 4048, "loss": 0.40355241298675537, "lr": 1.9693104697328477e-05, "epoch": 0.25108118126776224, "percentage": 12.55, "elapsed_time": "1:49:59", "remaining_time": "12:46:30"} +{"current_steps": 509, "total_steps": 4048, "loss": 0.3511045575141907, "lr": 1.9691092805241046e-05, "epoch": 0.25157543556159645, "percentage": 12.57, "elapsed_time": "1:50:12", "remaining_time": "12:46:14"} +{"current_steps": 510, "total_steps": 4048, "loss": 0.38917112350463867, "lr": 1.9689074443518526e-05, "epoch": 0.2520696898554306, "percentage": 12.6, "elapsed_time": "1:50:24", "remaining_time": "12:45:54"} +{"current_steps": 511, "total_steps": 4048, "loss": 0.40256473422050476, "lr": 1.968704961350835e-05, "epoch": 0.2525639441492648, "percentage": 12.62, "elapsed_time": "1:50:37", "remaining_time": "12:45:43"} +{"current_steps": 512, "total_steps": 4048, "loss": 0.32350897789001465, "lr": 1.968501831656226e-05, "epoch": 0.25305819844309896, "percentage": 12.65, "elapsed_time": "1:50:50", "remaining_time": "12:45:26"} +{"current_steps": 513, "total_steps": 4048, "loss": 0.36787012219429016, "lr": 1.9682980554036322e-05, "epoch": 0.25355245273693316, "percentage": 12.67, "elapsed_time": "1:51:02", "remaining_time": "12:45:09"} +{"current_steps": 514, "total_steps": 4048, "loss": 0.4035605490207672, "lr": 1.9680936327290924e-05, "epoch": 0.2540467070307673, "percentage": 12.7, "elapsed_time": "1:51:14", "remaining_time": "12:44:53"} +{"current_steps": 515, "total_steps": 4048, "loss": 0.39402660727500916, "lr": 1.9678885637690755e-05, "epoch": 0.2545409613246015, "percentage": 12.72, "elapsed_time": "1:51:27", "remaining_time": "12:44:35"} +{"current_steps": 516, "total_steps": 4048, "loss": 0.37553271651268005, "lr": 1.967682848660483e-05, "epoch": 0.2550352156184357, "percentage": 12.75, "elapsed_time": "1:51:40", "remaining_time": "12:44:23"} +{"current_steps": 517, "total_steps": 4048, "loss": 0.40148675441741943, "lr": 1.9674764875406472e-05, "epoch": 0.2555294699122699, "percentage": 12.77, "elapsed_time": "1:51:52", "remaining_time": "12:44:06"} +{"current_steps": 518, "total_steps": 4048, "loss": 0.45255252718925476, "lr": 1.967269480547332e-05, "epoch": 0.25602372420610403, "percentage": 12.8, "elapsed_time": "1:52:05", "remaining_time": "12:43:52"} +{"current_steps": 519, "total_steps": 4048, "loss": 0.4183574616909027, "lr": 1.9670618278187318e-05, "epoch": 0.25651797849993824, "percentage": 12.82, "elapsed_time": "1:52:18", "remaining_time": "12:43:37"} +{"current_steps": 520, "total_steps": 4048, "loss": 0.3950796127319336, "lr": 1.9668535294934733e-05, "epoch": 0.2570122327937724, "percentage": 12.85, "elapsed_time": "1:52:31", "remaining_time": "12:43:26"} +{"current_steps": 521, "total_steps": 4048, "loss": 0.4062424898147583, "lr": 1.9666445857106132e-05, "epoch": 0.2575064870876066, "percentage": 12.87, "elapsed_time": "1:52:44", "remaining_time": "12:43:10"} +{"current_steps": 522, "total_steps": 4048, "loss": 0.4095906913280487, "lr": 1.966434996609639e-05, "epoch": 0.25800074138144075, "percentage": 12.9, "elapsed_time": "1:52:56", "remaining_time": "12:42:55"} +{"current_steps": 523, "total_steps": 4048, "loss": 0.42302393913269043, "lr": 1.96622476233047e-05, "epoch": 0.25849499567527495, "percentage": 12.92, "elapsed_time": "1:53:09", "remaining_time": "12:42:38"} +{"current_steps": 524, "total_steps": 4048, "loss": 0.43204039335250854, "lr": 1.966013883013455e-05, "epoch": 0.2589892499691091, "percentage": 12.94, "elapsed_time": "1:53:22", "remaining_time": "12:42:26"} +{"current_steps": 525, "total_steps": 4048, "loss": 0.39941906929016113, "lr": 1.9658023587993748e-05, "epoch": 0.2594835042629433, "percentage": 12.97, "elapsed_time": "1:53:34", "remaining_time": "12:42:11"} +{"current_steps": 526, "total_steps": 4048, "loss": 0.37053728103637695, "lr": 1.9655901898294397e-05, "epoch": 0.25997775855677746, "percentage": 12.99, "elapsed_time": "1:53:48", "remaining_time": "12:42:00"} +{"current_steps": 527, "total_steps": 4048, "loss": 0.4126317501068115, "lr": 1.96537737624529e-05, "epoch": 0.26047201285061167, "percentage": 13.02, "elapsed_time": "1:54:01", "remaining_time": "12:41:48"} +{"current_steps": 528, "total_steps": 4048, "loss": 0.42397794127464294, "lr": 1.9651639181889975e-05, "epoch": 0.2609662671444458, "percentage": 13.04, "elapsed_time": "1:54:14", "remaining_time": "12:41:39"} +{"current_steps": 529, "total_steps": 4048, "loss": 0.3606872260570526, "lr": 1.964949815803064e-05, "epoch": 0.26146052143827997, "percentage": 13.07, "elapsed_time": "1:54:27", "remaining_time": "12:41:26"} +{"current_steps": 530, "total_steps": 4048, "loss": 0.420923113822937, "lr": 1.9647350692304206e-05, "epoch": 0.2619547757321142, "percentage": 13.09, "elapsed_time": "1:54:40", "remaining_time": "12:41:13"} +{"current_steps": 531, "total_steps": 4048, "loss": 0.41700440645217896, "lr": 1.9645196786144298e-05, "epoch": 0.2624490300259483, "percentage": 13.12, "elapsed_time": "1:54:54", "remaining_time": "12:41:05"} +{"current_steps": 532, "total_steps": 4048, "loss": 0.3961814045906067, "lr": 1.9643036440988825e-05, "epoch": 0.26294328431978253, "percentage": 13.14, "elapsed_time": "1:55:07", "remaining_time": "12:40:51"} +{"current_steps": 533, "total_steps": 4048, "loss": 0.4025250971317291, "lr": 1.9640869658280005e-05, "epoch": 0.2634375386136167, "percentage": 13.17, "elapsed_time": "1:55:20", "remaining_time": "12:40:36"} +{"current_steps": 534, "total_steps": 4048, "loss": 0.38828611373901367, "lr": 1.9638696439464357e-05, "epoch": 0.2639317929074509, "percentage": 13.19, "elapsed_time": "1:55:32", "remaining_time": "12:40:19"} +{"current_steps": 535, "total_steps": 4048, "loss": 0.3109109401702881, "lr": 1.963651678599268e-05, "epoch": 0.26442604720128504, "percentage": 13.22, "elapsed_time": "1:55:45", "remaining_time": "12:40:04"} +{"current_steps": 536, "total_steps": 4048, "loss": 0.41516438126564026, "lr": 1.963433069932009e-05, "epoch": 0.26492030149511925, "percentage": 13.24, "elapsed_time": "1:55:57", "remaining_time": "12:39:48"} +{"current_steps": 537, "total_steps": 4048, "loss": 0.3765295743942261, "lr": 1.9632138180905982e-05, "epoch": 0.2654145557889534, "percentage": 13.27, "elapsed_time": "1:56:10", "remaining_time": "12:39:35"} +{"current_steps": 538, "total_steps": 4048, "loss": 0.37631309032440186, "lr": 1.9629939232214052e-05, "epoch": 0.2659088100827876, "percentage": 13.29, "elapsed_time": "1:56:23", "remaining_time": "12:39:19"} +{"current_steps": 539, "total_steps": 4048, "loss": 0.3640018403530121, "lr": 1.9627733854712286e-05, "epoch": 0.26640306437662176, "percentage": 13.32, "elapsed_time": "1:56:35", "remaining_time": "12:39:04"} +{"current_steps": 540, "total_steps": 4048, "loss": 0.3971521854400635, "lr": 1.9625522049872962e-05, "epoch": 0.26689731867045596, "percentage": 13.34, "elapsed_time": "1:56:47", "remaining_time": "12:38:45"} +{"current_steps": 541, "total_steps": 4048, "loss": 0.4218612313270569, "lr": 1.962330381917265e-05, "epoch": 0.2673915729642901, "percentage": 13.36, "elapsed_time": "1:57:00", "remaining_time": "12:38:30"} +{"current_steps": 542, "total_steps": 4048, "loss": 0.38814622163772583, "lr": 1.9621079164092203e-05, "epoch": 0.2678858272581243, "percentage": 13.39, "elapsed_time": "1:57:13", "remaining_time": "12:38:16"} +{"current_steps": 543, "total_steps": 4048, "loss": 0.3912709355354309, "lr": 1.961884808611678e-05, "epoch": 0.26838008155195847, "percentage": 13.41, "elapsed_time": "1:57:25", "remaining_time": "12:38:00"} +{"current_steps": 544, "total_steps": 4048, "loss": 0.4007106423377991, "lr": 1.9616610586735808e-05, "epoch": 0.2688743358457927, "percentage": 13.44, "elapsed_time": "1:57:39", "remaining_time": "12:37:50"} +{"current_steps": 545, "total_steps": 4048, "loss": 0.37406057119369507, "lr": 1.9614366667443016e-05, "epoch": 0.26936859013962683, "percentage": 13.46, "elapsed_time": "1:57:52", "remaining_time": "12:37:38"} +{"current_steps": 546, "total_steps": 4048, "loss": 0.4187811613082886, "lr": 1.961211632973641e-05, "epoch": 0.26986284443346104, "percentage": 13.49, "elapsed_time": "1:58:05", "remaining_time": "12:37:27"} +{"current_steps": 547, "total_steps": 4048, "loss": 0.44418057799339294, "lr": 1.960985957511828e-05, "epoch": 0.2703570987272952, "percentage": 13.51, "elapsed_time": "1:58:18", "remaining_time": "12:37:13"} +{"current_steps": 548, "total_steps": 4048, "loss": 0.41016438603401184, "lr": 1.9607596405095205e-05, "epoch": 0.2708513530211294, "percentage": 13.54, "elapsed_time": "1:58:32", "remaining_time": "12:37:04"} +{"current_steps": 549, "total_steps": 4048, "loss": 0.39461439847946167, "lr": 1.9605326821178047e-05, "epoch": 0.27134560731496354, "percentage": 13.56, "elapsed_time": "1:58:44", "remaining_time": "12:36:47"} +{"current_steps": 550, "total_steps": 4048, "loss": 0.4159786105155945, "lr": 1.960305082488195e-05, "epoch": 0.27183986160879775, "percentage": 13.59, "elapsed_time": "1:58:57", "remaining_time": "12:36:32"} +{"current_steps": 551, "total_steps": 4048, "loss": 0.3702941834926605, "lr": 1.960076841772633e-05, "epoch": 0.2723341159026319, "percentage": 13.61, "elapsed_time": "1:59:09", "remaining_time": "12:36:18"} +{"current_steps": 552, "total_steps": 4048, "loss": 0.3482900559902191, "lr": 1.9598479601234894e-05, "epoch": 0.2728283701964661, "percentage": 13.64, "elapsed_time": "1:59:22", "remaining_time": "12:36:05"} +{"current_steps": 553, "total_steps": 4048, "loss": 0.40550655126571655, "lr": 1.9596184376935618e-05, "epoch": 0.27332262449030026, "percentage": 13.66, "elapsed_time": "1:59:35", "remaining_time": "12:35:51"} +{"current_steps": 554, "total_steps": 4048, "loss": 0.38604867458343506, "lr": 1.9593882746360767e-05, "epoch": 0.2738168787841344, "percentage": 13.69, "elapsed_time": "1:59:49", "remaining_time": "12:35:42"} +{"current_steps": 555, "total_steps": 4048, "loss": 0.36586758494377136, "lr": 1.9591574711046876e-05, "epoch": 0.2743111330779686, "percentage": 13.71, "elapsed_time": "2:00:02", "remaining_time": "12:35:27"} +{"current_steps": 556, "total_steps": 4048, "loss": 0.37780559062957764, "lr": 1.958926027253475e-05, "epoch": 0.27480538737180277, "percentage": 13.74, "elapsed_time": "2:00:15", "remaining_time": "12:35:18"} +{"current_steps": 557, "total_steps": 4048, "loss": 0.3837544322013855, "lr": 1.9586939432369486e-05, "epoch": 0.275299641665637, "percentage": 13.76, "elapsed_time": "2:00:28", "remaining_time": "12:35:05"} +{"current_steps": 558, "total_steps": 4048, "loss": 0.39425861835479736, "lr": 1.9584612192100433e-05, "epoch": 0.2757938959594711, "percentage": 13.78, "elapsed_time": "2:00:42", "remaining_time": "12:34:55"} +{"current_steps": 559, "total_steps": 4048, "loss": 0.4008832275867462, "lr": 1.958227855328123e-05, "epoch": 0.27628815025330533, "percentage": 13.81, "elapsed_time": "2:00:55", "remaining_time": "12:34:43"} +{"current_steps": 560, "total_steps": 4048, "loss": 0.42411595582962036, "lr": 1.957993851746978e-05, "epoch": 0.2767824045471395, "percentage": 13.83, "elapsed_time": "2:01:08", "remaining_time": "12:34:33"} +{"current_steps": 561, "total_steps": 4048, "loss": 0.4028055965900421, "lr": 1.9577592086228257e-05, "epoch": 0.2772766588409737, "percentage": 13.86, "elapsed_time": "2:01:21", "remaining_time": "12:34:18"} +{"current_steps": 562, "total_steps": 4048, "loss": 0.3785157799720764, "lr": 1.9575239261123102e-05, "epoch": 0.27777091313480784, "percentage": 13.88, "elapsed_time": "2:01:34", "remaining_time": "12:34:04"} +{"current_steps": 563, "total_steps": 4048, "loss": 0.3726264536380768, "lr": 1.9572880043725032e-05, "epoch": 0.27826516742864205, "percentage": 13.91, "elapsed_time": "2:01:47", "remaining_time": "12:33:54"} +{"current_steps": 564, "total_steps": 4048, "loss": 0.37261486053466797, "lr": 1.957051443560902e-05, "epoch": 0.2787594217224762, "percentage": 13.93, "elapsed_time": "2:02:00", "remaining_time": "12:33:40"} +{"current_steps": 565, "total_steps": 4048, "loss": 0.34781068563461304, "lr": 1.956814243835432e-05, "epoch": 0.2792536760163104, "percentage": 13.96, "elapsed_time": "2:02:14", "remaining_time": "12:33:31"} +{"current_steps": 566, "total_steps": 4048, "loss": 0.3828197121620178, "lr": 1.956576405354444e-05, "epoch": 0.27974793031014455, "percentage": 13.98, "elapsed_time": "2:02:26", "remaining_time": "12:33:17"} +{"current_steps": 567, "total_steps": 4048, "loss": 0.3839726150035858, "lr": 1.9563379282767156e-05, "epoch": 0.28024218460397876, "percentage": 14.01, "elapsed_time": "2:02:40", "remaining_time": "12:33:08"} +{"current_steps": 568, "total_steps": 4048, "loss": 0.3658025562763214, "lr": 1.9560988127614507e-05, "epoch": 0.2807364388978129, "percentage": 14.03, "elapsed_time": "2:02:53", "remaining_time": "12:32:54"} +{"current_steps": 569, "total_steps": 4048, "loss": 0.400045782327652, "lr": 1.9558590589682795e-05, "epoch": 0.2812306931916471, "percentage": 14.06, "elapsed_time": "2:03:06", "remaining_time": "12:32:44"} +{"current_steps": 570, "total_steps": 4048, "loss": 0.36586880683898926, "lr": 1.955618667057258e-05, "epoch": 0.28172494748548127, "percentage": 14.08, "elapsed_time": "2:03:19", "remaining_time": "12:32:32"} +{"current_steps": 571, "total_steps": 4048, "loss": 0.3886389136314392, "lr": 1.9553776371888684e-05, "epoch": 0.2822192017793155, "percentage": 14.11, "elapsed_time": "2:03:33", "remaining_time": "12:32:22"} +{"current_steps": 572, "total_steps": 4048, "loss": 0.37858110666275024, "lr": 1.955135969524019e-05, "epoch": 0.2827134560731496, "percentage": 14.13, "elapsed_time": "2:03:46", "remaining_time": "12:32:08"} +{"current_steps": 573, "total_steps": 4048, "loss": 0.3264877498149872, "lr": 1.9548936642240435e-05, "epoch": 0.28320771036698383, "percentage": 14.16, "elapsed_time": "2:03:58", "remaining_time": "12:31:53"} +{"current_steps": 574, "total_steps": 4048, "loss": 0.3756924569606781, "lr": 1.9546507214507017e-05, "epoch": 0.283701964660818, "percentage": 14.18, "elapsed_time": "2:04:11", "remaining_time": "12:31:40"} +{"current_steps": 575, "total_steps": 4048, "loss": 0.3773806691169739, "lr": 1.9544071413661783e-05, "epoch": 0.2841962189546522, "percentage": 14.2, "elapsed_time": "2:04:24", "remaining_time": "12:31:24"} +{"current_steps": 576, "total_steps": 4048, "loss": 0.37437382340431213, "lr": 1.9541629241330842e-05, "epoch": 0.28469047324848634, "percentage": 14.23, "elapsed_time": "2:04:37", "remaining_time": "12:31:12"} +{"current_steps": 577, "total_steps": 4048, "loss": 0.3835929036140442, "lr": 1.9539180699144552e-05, "epoch": 0.28518472754232055, "percentage": 14.25, "elapsed_time": "2:04:50", "remaining_time": "12:30:58"} +{"current_steps": 578, "total_steps": 4048, "loss": 0.39163681864738464, "lr": 1.9536725788737528e-05, "epoch": 0.2856789818361547, "percentage": 14.28, "elapsed_time": "2:05:03", "remaining_time": "12:30:46"} +{"current_steps": 579, "total_steps": 4048, "loss": 0.39241698384284973, "lr": 1.953426451174863e-05, "epoch": 0.2861732361299889, "percentage": 14.3, "elapsed_time": "2:05:15", "remaining_time": "12:30:30"} +{"current_steps": 580, "total_steps": 4048, "loss": 0.32731348276138306, "lr": 1.953179686982097e-05, "epoch": 0.28666749042382306, "percentage": 14.33, "elapsed_time": "2:05:29", "remaining_time": "12:30:20"} +{"current_steps": 581, "total_steps": 4048, "loss": 0.34735041856765747, "lr": 1.9529322864601915e-05, "epoch": 0.2871617447176572, "percentage": 14.35, "elapsed_time": "2:05:42", "remaining_time": "12:30:07"} +{"current_steps": 582, "total_steps": 4048, "loss": 0.3795308470726013, "lr": 1.952684249774307e-05, "epoch": 0.2876559990114914, "percentage": 14.38, "elapsed_time": "2:05:55", "remaining_time": "12:29:54"} +{"current_steps": 583, "total_steps": 4048, "loss": 0.3546086549758911, "lr": 1.95243557709003e-05, "epoch": 0.28815025330532557, "percentage": 14.4, "elapsed_time": "2:06:07", "remaining_time": "12:29:37"} +{"current_steps": 584, "total_steps": 4048, "loss": 0.35397839546203613, "lr": 1.9521862685733703e-05, "epoch": 0.28864450759915977, "percentage": 14.43, "elapsed_time": "2:06:20", "remaining_time": "12:29:21"} +{"current_steps": 585, "total_steps": 4048, "loss": 0.350521981716156, "lr": 1.9519363243907627e-05, "epoch": 0.2891387618929939, "percentage": 14.45, "elapsed_time": "2:06:33", "remaining_time": "12:29:09"} +{"current_steps": 586, "total_steps": 4048, "loss": 0.380625456571579, "lr": 1.9516857447090663e-05, "epoch": 0.28963301618682813, "percentage": 14.48, "elapsed_time": "2:06:46", "remaining_time": "12:28:56"} +{"current_steps": 587, "total_steps": 4048, "loss": 0.40378236770629883, "lr": 1.9514345296955647e-05, "epoch": 0.2901272704806623, "percentage": 14.5, "elapsed_time": "2:06:59", "remaining_time": "12:28:43"} +{"current_steps": 588, "total_steps": 4048, "loss": 0.4050450325012207, "lr": 1.9511826795179653e-05, "epoch": 0.2906215247744965, "percentage": 14.53, "elapsed_time": "2:07:11", "remaining_time": "12:28:28"} +{"current_steps": 589, "total_steps": 4048, "loss": 0.3772329092025757, "lr": 1.9509301943444e-05, "epoch": 0.29111577906833064, "percentage": 14.55, "elapsed_time": "2:07:24", "remaining_time": "12:28:16"} +{"current_steps": 590, "total_steps": 4048, "loss": 0.4079870581626892, "lr": 1.9506770743434244e-05, "epoch": 0.29161003336216484, "percentage": 14.58, "elapsed_time": "2:07:37", "remaining_time": "12:28:01"} +{"current_steps": 591, "total_steps": 4048, "loss": 0.4233503043651581, "lr": 1.950423319684017e-05, "epoch": 0.292104287655999, "percentage": 14.6, "elapsed_time": "2:07:50", "remaining_time": "12:27:48"} +{"current_steps": 592, "total_steps": 4048, "loss": 0.395530104637146, "lr": 1.9501689305355814e-05, "epoch": 0.2925985419498332, "percentage": 14.62, "elapsed_time": "2:08:03", "remaining_time": "12:27:34"} +{"current_steps": 593, "total_steps": 4048, "loss": 0.4266175925731659, "lr": 1.949913907067944e-05, "epoch": 0.29309279624366735, "percentage": 14.65, "elapsed_time": "2:08:16", "remaining_time": "12:27:21"} +{"current_steps": 594, "total_steps": 4048, "loss": 0.4428660571575165, "lr": 1.949658249451355e-05, "epoch": 0.29358705053750156, "percentage": 14.67, "elapsed_time": "2:08:28", "remaining_time": "12:27:06"} +{"current_steps": 595, "total_steps": 4048, "loss": 0.36831945180892944, "lr": 1.9494019578564874e-05, "epoch": 0.2940813048313357, "percentage": 14.7, "elapsed_time": "2:08:42", "remaining_time": "12:26:54"} +{"current_steps": 596, "total_steps": 4048, "loss": 0.392259806394577, "lr": 1.949145032454438e-05, "epoch": 0.2945755591251699, "percentage": 14.72, "elapsed_time": "2:08:54", "remaining_time": "12:26:39"} +{"current_steps": 597, "total_steps": 4048, "loss": 0.43743032217025757, "lr": 1.948887473416727e-05, "epoch": 0.29506981341900407, "percentage": 14.75, "elapsed_time": "2:09:07", "remaining_time": "12:26:27"} +{"current_steps": 598, "total_steps": 4048, "loss": 0.3725258409976959, "lr": 1.9486292809152965e-05, "epoch": 0.2955640677128383, "percentage": 14.77, "elapsed_time": "2:09:20", "remaining_time": "12:26:12"} +{"current_steps": 599, "total_steps": 4048, "loss": 0.39507436752319336, "lr": 1.948370455122512e-05, "epoch": 0.2960583220066724, "percentage": 14.8, "elapsed_time": "2:09:33", "remaining_time": "12:26:01"} +{"current_steps": 600, "total_steps": 4048, "loss": 0.40915870666503906, "lr": 1.9481109962111623e-05, "epoch": 0.29655257630050663, "percentage": 14.82, "elapsed_time": "2:09:46", "remaining_time": "12:25:46"} +{"current_steps": 601, "total_steps": 4048, "loss": 0.3465006351470947, "lr": 1.947850904354459e-05, "epoch": 0.2970468305943408, "percentage": 14.85, "elapsed_time": "2:10:05", "remaining_time": "12:26:09"} +{"current_steps": 602, "total_steps": 4048, "loss": 0.4205567538738251, "lr": 1.9475901797260346e-05, "epoch": 0.297541084888175, "percentage": 14.87, "elapsed_time": "2:10:18", "remaining_time": "12:25:52"} +{"current_steps": 603, "total_steps": 4048, "loss": 0.37682560086250305, "lr": 1.9473288224999455e-05, "epoch": 0.29803533918200914, "percentage": 14.9, "elapsed_time": "2:10:30", "remaining_time": "12:25:34"} +{"current_steps": 604, "total_steps": 4048, "loss": 0.3865458369255066, "lr": 1.9470668328506705e-05, "epoch": 0.29852959347584335, "percentage": 14.92, "elapsed_time": "2:10:42", "remaining_time": "12:25:20"} +{"current_steps": 605, "total_steps": 4048, "loss": 0.36366063356399536, "lr": 1.9468042109531096e-05, "epoch": 0.2990238477696775, "percentage": 14.95, "elapsed_time": "2:10:55", "remaining_time": "12:25:02"} +{"current_steps": 606, "total_steps": 4048, "loss": 0.3861471116542816, "lr": 1.9465409569825857e-05, "epoch": 0.2995181020635117, "percentage": 14.97, "elapsed_time": "2:11:07", "remaining_time": "12:24:46"} +{"current_steps": 607, "total_steps": 4048, "loss": 0.3499199151992798, "lr": 1.9462770711148433e-05, "epoch": 0.30001235635734586, "percentage": 15.0, "elapsed_time": "2:11:19", "remaining_time": "12:24:27"} +{"current_steps": 608, "total_steps": 4048, "loss": 0.3704417943954468, "lr": 1.946012553526049e-05, "epoch": 0.30050661065118, "percentage": 15.02, "elapsed_time": "2:11:32", "remaining_time": "12:24:13"} +{"current_steps": 609, "total_steps": 4048, "loss": 0.41278937458992004, "lr": 1.9457474043927908e-05, "epoch": 0.3010008649450142, "percentage": 15.04, "elapsed_time": "2:11:44", "remaining_time": "12:23:55"} +{"current_steps": 610, "total_steps": 4048, "loss": 0.36078256368637085, "lr": 1.9454816238920787e-05, "epoch": 0.30149511923884836, "percentage": 15.07, "elapsed_time": "2:11:56", "remaining_time": "12:23:40"} +{"current_steps": 611, "total_steps": 4048, "loss": 0.3713051676750183, "lr": 1.9452152122013434e-05, "epoch": 0.30198937353268257, "percentage": 15.09, "elapsed_time": "2:12:08", "remaining_time": "12:23:20"} +{"current_steps": 612, "total_steps": 4048, "loss": 0.3919684886932373, "lr": 1.9449481694984382e-05, "epoch": 0.3024836278265167, "percentage": 15.12, "elapsed_time": "2:12:21", "remaining_time": "12:23:06"} +{"current_steps": 613, "total_steps": 4048, "loss": 0.4249044358730316, "lr": 1.9446804959616364e-05, "epoch": 0.3029778821203509, "percentage": 15.14, "elapsed_time": "2:12:33", "remaining_time": "12:22:48"} +{"current_steps": 614, "total_steps": 4048, "loss": 0.4033172130584717, "lr": 1.9444121917696335e-05, "epoch": 0.3034721364141851, "percentage": 15.17, "elapsed_time": "2:12:45", "remaining_time": "12:22:32"} +{"current_steps": 615, "total_steps": 4048, "loss": 0.35740789771080017, "lr": 1.9441432571015455e-05, "epoch": 0.3039663907080193, "percentage": 15.19, "elapsed_time": "2:12:58", "remaining_time": "12:22:18"} +{"current_steps": 616, "total_steps": 4048, "loss": 0.41219189763069153, "lr": 1.9438736921369093e-05, "epoch": 0.30446064500185344, "percentage": 15.22, "elapsed_time": "2:13:11", "remaining_time": "12:22:02"} +{"current_steps": 617, "total_steps": 4048, "loss": 0.3751283884048462, "lr": 1.9436034970556824e-05, "epoch": 0.30495489929568764, "percentage": 15.24, "elapsed_time": "2:13:23", "remaining_time": "12:21:47"} +{"current_steps": 618, "total_steps": 4048, "loss": 0.40294593572616577, "lr": 1.9433326720382433e-05, "epoch": 0.3054491535895218, "percentage": 15.27, "elapsed_time": "2:13:36", "remaining_time": "12:21:30"} +{"current_steps": 619, "total_steps": 4048, "loss": 0.4163772463798523, "lr": 1.943061217265391e-05, "epoch": 0.305943407883356, "percentage": 15.29, "elapsed_time": "2:13:48", "remaining_time": "12:21:16"} +{"current_steps": 620, "total_steps": 4048, "loss": 0.3796529769897461, "lr": 1.9427891329183444e-05, "epoch": 0.30643766217719015, "percentage": 15.32, "elapsed_time": "2:14:01", "remaining_time": "12:20:59"} +{"current_steps": 621, "total_steps": 4048, "loss": 0.44097092747688293, "lr": 1.942516419178744e-05, "epoch": 0.30693191647102436, "percentage": 15.34, "elapsed_time": "2:14:14", "remaining_time": "12:20:46"} +{"current_steps": 622, "total_steps": 4048, "loss": 0.384232759475708, "lr": 1.942243076228649e-05, "epoch": 0.3074261707648585, "percentage": 15.37, "elapsed_time": "2:14:26", "remaining_time": "12:20:30"} +{"current_steps": 623, "total_steps": 4048, "loss": 0.3734084367752075, "lr": 1.941969104250539e-05, "epoch": 0.3079204250586927, "percentage": 15.39, "elapsed_time": "2:14:39", "remaining_time": "12:20:17"} +{"current_steps": 624, "total_steps": 4048, "loss": 0.3532239496707916, "lr": 1.9416945034273142e-05, "epoch": 0.30841467935252687, "percentage": 15.42, "elapsed_time": "2:14:51", "remaining_time": "12:20:01"} +{"current_steps": 625, "total_steps": 4048, "loss": 0.39430537819862366, "lr": 1.941419273942294e-05, "epoch": 0.3089089336463611, "percentage": 15.44, "elapsed_time": "2:15:04", "remaining_time": "12:19:44"} +{"current_steps": 626, "total_steps": 4048, "loss": 0.35790857672691345, "lr": 1.941143415979218e-05, "epoch": 0.3094031879401952, "percentage": 15.46, "elapsed_time": "2:15:16", "remaining_time": "12:19:29"} +{"current_steps": 627, "total_steps": 4048, "loss": 0.3684060871601105, "lr": 1.9408669297222446e-05, "epoch": 0.30989744223402943, "percentage": 15.49, "elapsed_time": "2:15:29", "remaining_time": "12:19:14"} +{"current_steps": 628, "total_steps": 4048, "loss": 0.3425355553627014, "lr": 1.9405898153559522e-05, "epoch": 0.3103916965278636, "percentage": 15.51, "elapsed_time": "2:15:42", "remaining_time": "12:19:03"} +{"current_steps": 629, "total_steps": 4048, "loss": 0.3295109272003174, "lr": 1.9403120730653387e-05, "epoch": 0.3108859508216978, "percentage": 15.54, "elapsed_time": "2:15:55", "remaining_time": "12:18:47"} +{"current_steps": 630, "total_steps": 4048, "loss": 0.37015989422798157, "lr": 1.940033703035821e-05, "epoch": 0.31138020511553194, "percentage": 15.56, "elapsed_time": "2:16:07", "remaining_time": "12:18:33"} +{"current_steps": 631, "total_steps": 4048, "loss": 0.40625980496406555, "lr": 1.939754705453234e-05, "epoch": 0.31187445940936614, "percentage": 15.59, "elapsed_time": "2:16:20", "remaining_time": "12:18:19"} +{"current_steps": 632, "total_steps": 4048, "loss": 0.42503830790519714, "lr": 1.939475080503833e-05, "epoch": 0.3123687137032003, "percentage": 15.61, "elapsed_time": "2:16:33", "remaining_time": "12:18:07"} +{"current_steps": 633, "total_steps": 4048, "loss": 0.36230289936065674, "lr": 1.939194828374292e-05, "epoch": 0.31286296799703445, "percentage": 15.64, "elapsed_time": "2:16:46", "remaining_time": "12:17:55"} +{"current_steps": 634, "total_steps": 4048, "loss": 0.4128720164299011, "lr": 1.938913949251703e-05, "epoch": 0.31335722229086865, "percentage": 15.66, "elapsed_time": "2:17:00", "remaining_time": "12:17:45"} +{"current_steps": 635, "total_steps": 4048, "loss": 0.39706575870513916, "lr": 1.938632443323577e-05, "epoch": 0.3138514765847028, "percentage": 15.69, "elapsed_time": "2:17:13", "remaining_time": "12:17:32"} +{"current_steps": 636, "total_steps": 4048, "loss": 0.38395214080810547, "lr": 1.9383503107778434e-05, "epoch": 0.314345730878537, "percentage": 15.71, "elapsed_time": "2:17:26", "remaining_time": "12:17:22"} +{"current_steps": 637, "total_steps": 4048, "loss": 0.3629944324493408, "lr": 1.9380675518028495e-05, "epoch": 0.31483998517237116, "percentage": 15.74, "elapsed_time": "2:17:40", "remaining_time": "12:17:10"} +{"current_steps": 638, "total_steps": 4048, "loss": 0.39474761486053467, "lr": 1.937784166587361e-05, "epoch": 0.31533423946620537, "percentage": 15.76, "elapsed_time": "2:17:52", "remaining_time": "12:16:56"} +{"current_steps": 639, "total_steps": 4048, "loss": 0.423098087310791, "lr": 1.9375001553205627e-05, "epoch": 0.3158284937600395, "percentage": 15.79, "elapsed_time": "2:18:05", "remaining_time": "12:16:39"} +{"current_steps": 640, "total_steps": 4048, "loss": 0.3453904986381531, "lr": 1.937215518192056e-05, "epoch": 0.3163227480538737, "percentage": 15.81, "elapsed_time": "2:18:17", "remaining_time": "12:16:25"} +{"current_steps": 641, "total_steps": 4048, "loss": 0.3659127354621887, "lr": 1.9369302553918605e-05, "epoch": 0.3168170023477079, "percentage": 15.83, "elapsed_time": "2:18:30", "remaining_time": "12:16:09"} +{"current_steps": 642, "total_steps": 4048, "loss": 0.3613426089286804, "lr": 1.9366443671104132e-05, "epoch": 0.3173112566415421, "percentage": 15.86, "elapsed_time": "2:18:43", "remaining_time": "12:15:56"} +{"current_steps": 643, "total_steps": 4048, "loss": 0.3556531071662903, "lr": 1.93635785353857e-05, "epoch": 0.31780551093537623, "percentage": 15.88, "elapsed_time": "2:18:55", "remaining_time": "12:15:41"} +{"current_steps": 644, "total_steps": 4048, "loss": 0.3515596091747284, "lr": 1.9360707148676022e-05, "epoch": 0.31829976522921044, "percentage": 15.91, "elapsed_time": "2:19:08", "remaining_time": "12:15:25"} +{"current_steps": 645, "total_steps": 4048, "loss": 0.36270469427108765, "lr": 1.9357829512892e-05, "epoch": 0.3187940195230446, "percentage": 15.93, "elapsed_time": "2:19:20", "remaining_time": "12:15:10"} +{"current_steps": 646, "total_steps": 4048, "loss": 0.3617076277732849, "lr": 1.9354945629954706e-05, "epoch": 0.3192882738168788, "percentage": 15.96, "elapsed_time": "2:19:32", "remaining_time": "12:14:53"} +{"current_steps": 647, "total_steps": 4048, "loss": 0.3888331949710846, "lr": 1.9352055501789376e-05, "epoch": 0.31978252811071295, "percentage": 15.98, "elapsed_time": "2:19:45", "remaining_time": "12:14:37"} +{"current_steps": 648, "total_steps": 4048, "loss": 0.41199982166290283, "lr": 1.9349159130325413e-05, "epoch": 0.32027678240454716, "percentage": 16.01, "elapsed_time": "2:19:57", "remaining_time": "12:14:19"} +{"current_steps": 649, "total_steps": 4048, "loss": 0.3878370225429535, "lr": 1.93462565174964e-05, "epoch": 0.3207710366983813, "percentage": 16.03, "elapsed_time": "2:20:09", "remaining_time": "12:14:04"} +{"current_steps": 650, "total_steps": 4048, "loss": 0.380184531211853, "lr": 1.9343347665240077e-05, "epoch": 0.3212652909922155, "percentage": 16.06, "elapsed_time": "2:20:22", "remaining_time": "12:13:49"} +{"current_steps": 651, "total_steps": 4048, "loss": 0.3746795356273651, "lr": 1.9340432575498355e-05, "epoch": 0.32175954528604966, "percentage": 16.08, "elapsed_time": "2:20:34", "remaining_time": "12:13:33"} +{"current_steps": 652, "total_steps": 4048, "loss": 0.3700905442237854, "lr": 1.93375112502173e-05, "epoch": 0.32225379957988387, "percentage": 16.11, "elapsed_time": "2:20:47", "remaining_time": "12:13:17"} +{"current_steps": 653, "total_steps": 4048, "loss": 0.3331850469112396, "lr": 1.9334583691347153e-05, "epoch": 0.322748053873718, "percentage": 16.13, "elapsed_time": "2:20:59", "remaining_time": "12:13:02"} +{"current_steps": 654, "total_steps": 4048, "loss": 0.3397464156150818, "lr": 1.933164990084231e-05, "epoch": 0.32324230816755223, "percentage": 16.16, "elapsed_time": "2:21:12", "remaining_time": "12:12:46"} +{"current_steps": 655, "total_steps": 4048, "loss": 0.3837242126464844, "lr": 1.9328709880661326e-05, "epoch": 0.3237365624613864, "percentage": 16.18, "elapsed_time": "2:21:24", "remaining_time": "12:12:28"} +{"current_steps": 656, "total_steps": 4048, "loss": 0.38854193687438965, "lr": 1.9325763632766916e-05, "epoch": 0.3242308167552206, "percentage": 16.21, "elapsed_time": "2:21:36", "remaining_time": "12:12:13"} +{"current_steps": 657, "total_steps": 4048, "loss": 0.41792556643486023, "lr": 1.9322811159125955e-05, "epoch": 0.32472507104905474, "percentage": 16.23, "elapsed_time": "2:21:48", "remaining_time": "12:11:55"} +{"current_steps": 658, "total_steps": 4048, "loss": 0.3968243896961212, "lr": 1.931985246170947e-05, "epoch": 0.32521932534288894, "percentage": 16.25, "elapsed_time": "2:22:01", "remaining_time": "12:11:40"} +{"current_steps": 659, "total_steps": 4048, "loss": 0.41183531284332275, "lr": 1.9316887542492645e-05, "epoch": 0.3257135796367231, "percentage": 16.28, "elapsed_time": "2:22:13", "remaining_time": "12:11:23"} +{"current_steps": 660, "total_steps": 4048, "loss": 0.36057350039482117, "lr": 1.931391640345482e-05, "epoch": 0.32620783393055724, "percentage": 16.3, "elapsed_time": "2:22:25", "remaining_time": "12:11:09"} +{"current_steps": 661, "total_steps": 4048, "loss": 0.36032363772392273, "lr": 1.9310939046579482e-05, "epoch": 0.32670208822439145, "percentage": 16.33, "elapsed_time": "2:22:38", "remaining_time": "12:10:52"} +{"current_steps": 662, "total_steps": 4048, "loss": 0.3682931363582611, "lr": 1.9307955473854275e-05, "epoch": 0.3271963425182256, "percentage": 16.35, "elapsed_time": "2:22:51", "remaining_time": "12:10:39"} +{"current_steps": 663, "total_steps": 4048, "loss": 0.3829198181629181, "lr": 1.9304965687270987e-05, "epoch": 0.3276905968120598, "percentage": 16.38, "elapsed_time": "2:23:03", "remaining_time": "12:10:23"} +{"current_steps": 664, "total_steps": 4048, "loss": 0.3901137709617615, "lr": 1.930196968882556e-05, "epoch": 0.32818485110589396, "percentage": 16.4, "elapsed_time": "2:23:15", "remaining_time": "12:10:07"} +{"current_steps": 665, "total_steps": 4048, "loss": 0.34352344274520874, "lr": 1.9298967480518077e-05, "epoch": 0.32867910539972817, "percentage": 16.43, "elapsed_time": "2:23:28", "remaining_time": "12:09:51"} +{"current_steps": 666, "total_steps": 4048, "loss": 0.38822662830352783, "lr": 1.9295959064352767e-05, "epoch": 0.3291733596935623, "percentage": 16.45, "elapsed_time": "2:23:40", "remaining_time": "12:09:37"} +{"current_steps": 667, "total_steps": 4048, "loss": 0.3639586567878723, "lr": 1.9292944442338013e-05, "epoch": 0.3296676139873965, "percentage": 16.48, "elapsed_time": "2:23:52", "remaining_time": "12:09:20"} +{"current_steps": 668, "total_steps": 4048, "loss": 0.38537997007369995, "lr": 1.9289923616486326e-05, "epoch": 0.3301618682812307, "percentage": 16.5, "elapsed_time": "2:24:05", "remaining_time": "12:09:06"} +{"current_steps": 669, "total_steps": 4048, "loss": 0.3514263331890106, "lr": 1.9286896588814373e-05, "epoch": 0.3306561225750649, "percentage": 16.53, "elapsed_time": "2:24:17", "remaining_time": "12:08:49"} +{"current_steps": 670, "total_steps": 4048, "loss": 0.3873803913593292, "lr": 1.928386336134295e-05, "epoch": 0.33115037686889903, "percentage": 16.55, "elapsed_time": "2:24:30", "remaining_time": "12:08:34"} +{"current_steps": 671, "total_steps": 4048, "loss": 0.36644282937049866, "lr": 1.9280823936096994e-05, "epoch": 0.33164463116273324, "percentage": 16.58, "elapsed_time": "2:24:42", "remaining_time": "12:08:18"} +{"current_steps": 672, "total_steps": 4048, "loss": 0.34837427735328674, "lr": 1.9277778315105587e-05, "epoch": 0.3321388854565674, "percentage": 16.6, "elapsed_time": "2:24:55", "remaining_time": "12:08:03"} +{"current_steps": 673, "total_steps": 4048, "loss": 0.3879021406173706, "lr": 1.927472650040194e-05, "epoch": 0.3326331397504016, "percentage": 16.63, "elapsed_time": "2:25:07", "remaining_time": "12:07:46"} +{"current_steps": 674, "total_steps": 4048, "loss": 0.3753926753997803, "lr": 1.9271668494023404e-05, "epoch": 0.33312739404423575, "percentage": 16.65, "elapsed_time": "2:25:19", "remaining_time": "12:07:30"} +{"current_steps": 675, "total_steps": 4048, "loss": 0.35362815856933594, "lr": 1.9268604298011454e-05, "epoch": 0.33362164833806995, "percentage": 16.67, "elapsed_time": "2:25:32", "remaining_time": "12:07:16"} +{"current_steps": 676, "total_steps": 4048, "loss": 0.3685564696788788, "lr": 1.926553391441171e-05, "epoch": 0.3341159026319041, "percentage": 16.7, "elapsed_time": "2:25:45", "remaining_time": "12:07:01"} +{"current_steps": 677, "total_steps": 4048, "loss": 0.42326927185058594, "lr": 1.926245734527391e-05, "epoch": 0.3346101569257383, "percentage": 16.72, "elapsed_time": "2:25:58", "remaining_time": "12:06:49"} +{"current_steps": 678, "total_steps": 4048, "loss": 0.35918861627578735, "lr": 1.925937459265193e-05, "epoch": 0.33510441121957246, "percentage": 16.75, "elapsed_time": "2:26:10", "remaining_time": "12:06:35"} +{"current_steps": 679, "total_steps": 4048, "loss": 0.38703471422195435, "lr": 1.9256285658603773e-05, "epoch": 0.33559866551340667, "percentage": 16.77, "elapsed_time": "2:26:23", "remaining_time": "12:06:22"} +{"current_steps": 680, "total_steps": 4048, "loss": 0.3993009924888611, "lr": 1.9253190545191567e-05, "epoch": 0.3360929198072408, "percentage": 16.8, "elapsed_time": "2:26:36", "remaining_time": "12:06:06"} +{"current_steps": 681, "total_steps": 4048, "loss": 0.3998498320579529, "lr": 1.9250089254481566e-05, "epoch": 0.336587174101075, "percentage": 16.82, "elapsed_time": "2:26:48", "remaining_time": "12:05:52"} +{"current_steps": 682, "total_steps": 4048, "loss": 0.37211501598358154, "lr": 1.9246981788544145e-05, "epoch": 0.3370814283949092, "percentage": 16.85, "elapsed_time": "2:27:01", "remaining_time": "12:05:36"} +{"current_steps": 683, "total_steps": 4048, "loss": 0.37204745411872864, "lr": 1.9243868149453806e-05, "epoch": 0.3375756826887434, "percentage": 16.87, "elapsed_time": "2:27:13", "remaining_time": "12:05:21"} +{"current_steps": 684, "total_steps": 4048, "loss": 0.3784663677215576, "lr": 1.924074833928917e-05, "epoch": 0.33806993698257753, "percentage": 16.9, "elapsed_time": "2:27:26", "remaining_time": "12:05:06"} +{"current_steps": 685, "total_steps": 4048, "loss": 0.4140951633453369, "lr": 1.9237622360132975e-05, "epoch": 0.33856419127641174, "percentage": 16.92, "elapsed_time": "2:27:38", "remaining_time": "12:04:50"} +{"current_steps": 686, "total_steps": 4048, "loss": 0.3723721504211426, "lr": 1.9234490214072083e-05, "epoch": 0.3390584455702459, "percentage": 16.95, "elapsed_time": "2:27:52", "remaining_time": "12:04:40"} +{"current_steps": 687, "total_steps": 4048, "loss": 0.3714251220226288, "lr": 1.923135190319747e-05, "epoch": 0.33955269986408004, "percentage": 16.97, "elapsed_time": "2:28:04", "remaining_time": "12:04:25"} +{"current_steps": 688, "total_steps": 4048, "loss": 0.3551461696624756, "lr": 1.9228207429604224e-05, "epoch": 0.34004695415791425, "percentage": 17.0, "elapsed_time": "2:28:17", "remaining_time": "12:04:14"} +{"current_steps": 689, "total_steps": 4048, "loss": 0.3543378412723541, "lr": 1.9225056795391554e-05, "epoch": 0.3405412084517484, "percentage": 17.02, "elapsed_time": "2:28:30", "remaining_time": "12:04:00"} +{"current_steps": 690, "total_steps": 4048, "loss": 0.3405894935131073, "lr": 1.922190000266278e-05, "epoch": 0.3410354627455826, "percentage": 17.05, "elapsed_time": "2:28:43", "remaining_time": "12:03:47"} +{"current_steps": 691, "total_steps": 4048, "loss": 0.36478808522224426, "lr": 1.9218737053525324e-05, "epoch": 0.34152971703941676, "percentage": 17.07, "elapsed_time": "2:28:56", "remaining_time": "12:03:33"} +{"current_steps": 692, "total_steps": 4048, "loss": 0.39778709411621094, "lr": 1.9215567950090734e-05, "epoch": 0.34202397133325096, "percentage": 17.09, "elapsed_time": "2:29:09", "remaining_time": "12:03:22"} +{"current_steps": 693, "total_steps": 4048, "loss": 0.3553788661956787, "lr": 1.9212392694474654e-05, "epoch": 0.3425182256270851, "percentage": 17.12, "elapsed_time": "2:29:22", "remaining_time": "12:03:09"} +{"current_steps": 694, "total_steps": 4048, "loss": 0.3393115997314453, "lr": 1.920921128879684e-05, "epoch": 0.3430124799209193, "percentage": 17.14, "elapsed_time": "2:29:36", "remaining_time": "12:03:02"} +{"current_steps": 695, "total_steps": 4048, "loss": 0.4240456819534302, "lr": 1.9206023735181154e-05, "epoch": 0.3435067342147535, "percentage": 17.17, "elapsed_time": "2:29:49", "remaining_time": "12:02:48"} +{"current_steps": 696, "total_steps": 4048, "loss": 0.3451164960861206, "lr": 1.920283003575556e-05, "epoch": 0.3440009885085877, "percentage": 17.19, "elapsed_time": "2:30:02", "remaining_time": "12:02:36"} +{"current_steps": 697, "total_steps": 4048, "loss": 0.4328063726425171, "lr": 1.919963019265213e-05, "epoch": 0.34449524280242183, "percentage": 17.22, "elapsed_time": "2:30:16", "remaining_time": "12:02:27"} +{"current_steps": 698, "total_steps": 4048, "loss": 0.35965877771377563, "lr": 1.9196424208007026e-05, "epoch": 0.34498949709625604, "percentage": 17.24, "elapsed_time": "2:30:29", "remaining_time": "12:02:14"} +{"current_steps": 699, "total_steps": 4048, "loss": 0.40995267033576965, "lr": 1.9193212083960522e-05, "epoch": 0.3454837513900902, "percentage": 17.27, "elapsed_time": "2:30:42", "remaining_time": "12:02:03"} +{"current_steps": 700, "total_steps": 4048, "loss": 0.373586505651474, "lr": 1.9189993822656984e-05, "epoch": 0.3459780056839244, "percentage": 17.29, "elapsed_time": "2:30:54", "remaining_time": "12:01:48"} +{"current_steps": 701, "total_steps": 4048, "loss": 0.3651657998561859, "lr": 1.918676942624488e-05, "epoch": 0.34647225997775855, "percentage": 17.32, "elapsed_time": "2:31:15", "remaining_time": "12:02:10"} +{"current_steps": 702, "total_steps": 4048, "loss": 0.3333090543746948, "lr": 1.918353889687677e-05, "epoch": 0.34696651427159275, "percentage": 17.34, "elapsed_time": "2:31:28", "remaining_time": "12:01:57"} +{"current_steps": 703, "total_steps": 4048, "loss": 0.444000780582428, "lr": 1.9180302236709312e-05, "epoch": 0.3474607685654269, "percentage": 17.37, "elapsed_time": "2:31:41", "remaining_time": "12:01:46"} +{"current_steps": 704, "total_steps": 4048, "loss": 0.34942537546157837, "lr": 1.917705944790325e-05, "epoch": 0.3479550228592611, "percentage": 17.39, "elapsed_time": "2:31:54", "remaining_time": "12:01:31"} +{"current_steps": 705, "total_steps": 4048, "loss": 0.4709789752960205, "lr": 1.9173810532623425e-05, "epoch": 0.34844927715309526, "percentage": 17.42, "elapsed_time": "2:32:07", "remaining_time": "12:01:21"} +{"current_steps": 706, "total_steps": 4048, "loss": 0.3615723252296448, "lr": 1.917055549303877e-05, "epoch": 0.34894353144692947, "percentage": 17.44, "elapsed_time": "2:32:20", "remaining_time": "12:01:07"} +{"current_steps": 707, "total_steps": 4048, "loss": 0.366035133600235, "lr": 1.9167294331322293e-05, "epoch": 0.3494377857407636, "percentage": 17.47, "elapsed_time": "2:32:33", "remaining_time": "12:00:57"} +{"current_steps": 708, "total_steps": 4048, "loss": 0.3916548490524292, "lr": 1.9164027049651105e-05, "epoch": 0.3499320400345978, "percentage": 17.49, "elapsed_time": "2:32:46", "remaining_time": "12:00:42"} +{"current_steps": 709, "total_steps": 4048, "loss": 0.3752925992012024, "lr": 1.91607536502064e-05, "epoch": 0.350426294328432, "percentage": 17.51, "elapsed_time": "2:32:58", "remaining_time": "12:00:26"} +{"current_steps": 710, "total_steps": 4048, "loss": 0.3471261262893677, "lr": 1.9157474135173448e-05, "epoch": 0.3509205486222662, "percentage": 17.54, "elapsed_time": "2:33:11", "remaining_time": "12:00:13"} +{"current_steps": 711, "total_steps": 4048, "loss": 0.36898115277290344, "lr": 1.9154188506741605e-05, "epoch": 0.35141480291610033, "percentage": 17.56, "elapsed_time": "2:33:24", "remaining_time": "11:59:59"} +{"current_steps": 712, "total_steps": 4048, "loss": 0.38236287236213684, "lr": 1.9150896767104315e-05, "epoch": 0.3519090572099345, "percentage": 17.59, "elapsed_time": "2:33:37", "remaining_time": "11:59:47"} +{"current_steps": 713, "total_steps": 4048, "loss": 0.39260241389274597, "lr": 1.9147598918459096e-05, "epoch": 0.3524033115037687, "percentage": 17.61, "elapsed_time": "2:33:50", "remaining_time": "11:59:33"} +{"current_steps": 714, "total_steps": 4048, "loss": 0.3699083626270294, "lr": 1.9144294963007542e-05, "epoch": 0.35289756579760284, "percentage": 17.64, "elapsed_time": "2:34:03", "remaining_time": "11:59:23"} +{"current_steps": 715, "total_steps": 4048, "loss": 0.37720543146133423, "lr": 1.914098490295532e-05, "epoch": 0.35339182009143705, "percentage": 17.66, "elapsed_time": "2:34:16", "remaining_time": "11:59:10"} +{"current_steps": 716, "total_steps": 4048, "loss": 0.298441082239151, "lr": 1.9137668740512195e-05, "epoch": 0.3538860743852712, "percentage": 17.69, "elapsed_time": "2:34:30", "remaining_time": "11:58:59"} +{"current_steps": 717, "total_steps": 4048, "loss": 0.3867550194263458, "lr": 1.913434647789197e-05, "epoch": 0.3543803286791054, "percentage": 17.71, "elapsed_time": "2:34:43", "remaining_time": "11:58:46"} +{"current_steps": 718, "total_steps": 4048, "loss": 0.37111300230026245, "lr": 1.913101811731256e-05, "epoch": 0.35487458297293956, "percentage": 17.74, "elapsed_time": "2:34:56", "remaining_time": "11:58:35"} +{"current_steps": 719, "total_steps": 4048, "loss": 0.3922812342643738, "lr": 1.9127683660995916e-05, "epoch": 0.35536883726677376, "percentage": 17.76, "elapsed_time": "2:35:09", "remaining_time": "11:58:21"} +{"current_steps": 720, "total_steps": 4048, "loss": 0.3878915309906006, "lr": 1.9124343111168077e-05, "epoch": 0.3558630915606079, "percentage": 17.79, "elapsed_time": "2:35:22", "remaining_time": "11:58:09"} +{"current_steps": 721, "total_steps": 4048, "loss": 0.34974879026412964, "lr": 1.9120996470059153e-05, "epoch": 0.3563573458544421, "percentage": 17.81, "elapsed_time": "2:35:35", "remaining_time": "11:57:55"} +{"current_steps": 722, "total_steps": 4048, "loss": 0.38341426849365234, "lr": 1.9117643739903306e-05, "epoch": 0.35685160014827627, "percentage": 17.84, "elapsed_time": "2:35:47", "remaining_time": "11:57:42"} +{"current_steps": 723, "total_steps": 4048, "loss": 0.32610252499580383, "lr": 1.9114284922938772e-05, "epoch": 0.3573458544421105, "percentage": 17.86, "elapsed_time": "2:36:01", "remaining_time": "11:57:31"} +{"current_steps": 724, "total_steps": 4048, "loss": 0.37203550338745117, "lr": 1.9110920021407855e-05, "epoch": 0.35784010873594463, "percentage": 17.89, "elapsed_time": "2:36:14", "remaining_time": "11:57:17"} +{"current_steps": 725, "total_steps": 4048, "loss": 0.2954786419868469, "lr": 1.9107549037556906e-05, "epoch": 0.35833436302977884, "percentage": 17.91, "elapsed_time": "2:36:27", "remaining_time": "11:57:06"} +{"current_steps": 726, "total_steps": 4048, "loss": 0.33074450492858887, "lr": 1.9104171973636353e-05, "epoch": 0.358828617323613, "percentage": 17.93, "elapsed_time": "2:36:39", "remaining_time": "11:56:50"} +{"current_steps": 727, "total_steps": 4048, "loss": 0.350687712430954, "lr": 1.9100788831900676e-05, "epoch": 0.3593228716174472, "percentage": 17.96, "elapsed_time": "2:36:52", "remaining_time": "11:56:36"} +{"current_steps": 728, "total_steps": 4048, "loss": 0.3635619878768921, "lr": 1.9097399614608406e-05, "epoch": 0.35981712591128134, "percentage": 17.98, "elapsed_time": "2:37:05", "remaining_time": "11:56:22"} +{"current_steps": 729, "total_steps": 4048, "loss": 0.36409544944763184, "lr": 1.909400432402214e-05, "epoch": 0.36031138020511555, "percentage": 18.01, "elapsed_time": "2:37:17", "remaining_time": "11:56:08"} +{"current_steps": 730, "total_steps": 4048, "loss": 0.4109501540660858, "lr": 1.9090602962408523e-05, "epoch": 0.3608056344989497, "percentage": 18.03, "elapsed_time": "2:37:30", "remaining_time": "11:55:53"} +{"current_steps": 731, "total_steps": 4048, "loss": 0.337943971157074, "lr": 1.908719553203826e-05, "epoch": 0.3612998887927839, "percentage": 18.06, "elapsed_time": "2:37:43", "remaining_time": "11:55:42"} +{"current_steps": 732, "total_steps": 4048, "loss": 0.36411553621292114, "lr": 1.9083782035186097e-05, "epoch": 0.36179414308661806, "percentage": 18.08, "elapsed_time": "2:37:56", "remaining_time": "11:55:28"} +{"current_steps": 733, "total_steps": 4048, "loss": 0.3513786494731903, "lr": 1.908036247413084e-05, "epoch": 0.36228839738045227, "percentage": 18.11, "elapsed_time": "2:38:09", "remaining_time": "11:55:16"} +{"current_steps": 734, "total_steps": 4048, "loss": 0.4017047584056854, "lr": 1.907693685115534e-05, "epoch": 0.3627826516742864, "percentage": 18.13, "elapsed_time": "2:38:21", "remaining_time": "11:54:59"} +{"current_steps": 735, "total_steps": 4048, "loss": 0.3780835270881653, "lr": 1.907350516854649e-05, "epoch": 0.3632769059681206, "percentage": 18.16, "elapsed_time": "2:38:34", "remaining_time": "11:54:44"} +{"current_steps": 736, "total_steps": 4048, "loss": 0.35562777519226074, "lr": 1.9070067428595234e-05, "epoch": 0.3637711602619548, "percentage": 18.18, "elapsed_time": "2:38:46", "remaining_time": "11:54:27"} +{"current_steps": 737, "total_steps": 4048, "loss": 0.34880492091178894, "lr": 1.9066623633596556e-05, "epoch": 0.364265414555789, "percentage": 18.21, "elapsed_time": "2:38:58", "remaining_time": "11:54:12"} +{"current_steps": 738, "total_steps": 4048, "loss": 0.3798677921295166, "lr": 1.9063173785849488e-05, "epoch": 0.36475966884962313, "percentage": 18.23, "elapsed_time": "2:39:11", "remaining_time": "11:53:57"} +{"current_steps": 739, "total_steps": 4048, "loss": 0.371119886636734, "lr": 1.9059717887657098e-05, "epoch": 0.3652539231434573, "percentage": 18.26, "elapsed_time": "2:39:24", "remaining_time": "11:53:44"} +{"current_steps": 740, "total_steps": 4048, "loss": 0.3845891058444977, "lr": 1.9056255941326497e-05, "epoch": 0.3657481774372915, "percentage": 18.28, "elapsed_time": "2:39:36", "remaining_time": "11:53:30"} +{"current_steps": 741, "total_steps": 4048, "loss": 0.34627166390419006, "lr": 1.9052787949168823e-05, "epoch": 0.36624243173112564, "percentage": 18.31, "elapsed_time": "2:39:49", "remaining_time": "11:53:14"} +{"current_steps": 742, "total_steps": 4048, "loss": 0.3872081935405731, "lr": 1.9049313913499266e-05, "epoch": 0.36673668602495985, "percentage": 18.33, "elapsed_time": "2:40:02", "remaining_time": "11:53:02"} +{"current_steps": 743, "total_steps": 4048, "loss": 0.40446269512176514, "lr": 1.9045833836637038e-05, "epoch": 0.367230940318794, "percentage": 18.35, "elapsed_time": "2:40:14", "remaining_time": "11:52:46"} +{"current_steps": 744, "total_steps": 4048, "loss": 0.3421085476875305, "lr": 1.904234772090539e-05, "epoch": 0.3677251946126282, "percentage": 18.38, "elapsed_time": "2:40:27", "remaining_time": "11:52:34"} +{"current_steps": 745, "total_steps": 4048, "loss": 0.3626730442047119, "lr": 1.90388555686316e-05, "epoch": 0.36821944890646235, "percentage": 18.4, "elapsed_time": "2:40:40", "remaining_time": "11:52:20"} +{"current_steps": 746, "total_steps": 4048, "loss": 0.338506281375885, "lr": 1.9035357382146984e-05, "epoch": 0.36871370320029656, "percentage": 18.43, "elapsed_time": "2:40:53", "remaining_time": "11:52:10"} +{"current_steps": 747, "total_steps": 4048, "loss": 0.3709959089756012, "lr": 1.903185316378688e-05, "epoch": 0.3692079574941307, "percentage": 18.45, "elapsed_time": "2:41:06", "remaining_time": "11:51:57"} +{"current_steps": 748, "total_steps": 4048, "loss": 0.3804059624671936, "lr": 1.9028342915890655e-05, "epoch": 0.3697022117879649, "percentage": 18.48, "elapsed_time": "2:41:19", "remaining_time": "11:51:45"} +{"current_steps": 749, "total_steps": 4048, "loss": 0.3416539132595062, "lr": 1.9024826640801694e-05, "epoch": 0.37019646608179907, "percentage": 18.5, "elapsed_time": "2:41:32", "remaining_time": "11:51:31"} +{"current_steps": 750, "total_steps": 4048, "loss": 0.3642072081565857, "lr": 1.9021304340867418e-05, "epoch": 0.3706907203756333, "percentage": 18.53, "elapsed_time": "2:41:45", "remaining_time": "11:51:20"} +{"current_steps": 751, "total_steps": 4048, "loss": 0.35957199335098267, "lr": 1.9017776018439267e-05, "epoch": 0.3711849746694674, "percentage": 18.55, "elapsed_time": "2:41:58", "remaining_time": "11:51:06"} +{"current_steps": 752, "total_steps": 4048, "loss": 0.38497287034988403, "lr": 1.9014241675872692e-05, "epoch": 0.37167922896330163, "percentage": 18.58, "elapsed_time": "2:42:11", "remaining_time": "11:50:53"} +{"current_steps": 753, "total_steps": 4048, "loss": 0.40713614225387573, "lr": 1.9010701315527173e-05, "epoch": 0.3721734832571358, "percentage": 18.6, "elapsed_time": "2:42:24", "remaining_time": "11:50:41"} +{"current_steps": 754, "total_steps": 4048, "loss": 0.35115551948547363, "lr": 1.9007154939766196e-05, "epoch": 0.37266773755097, "percentage": 18.63, "elapsed_time": "2:42:37", "remaining_time": "11:50:28"} +{"current_steps": 755, "total_steps": 4048, "loss": 0.3478096127510071, "lr": 1.9003602550957284e-05, "epoch": 0.37316199184480414, "percentage": 18.65, "elapsed_time": "2:42:50", "remaining_time": "11:50:16"} +{"current_steps": 756, "total_steps": 4048, "loss": 0.36460641026496887, "lr": 1.9000044151471956e-05, "epoch": 0.37365624613863835, "percentage": 18.68, "elapsed_time": "2:43:03", "remaining_time": "11:50:00"} +{"current_steps": 757, "total_steps": 4048, "loss": 0.38015758991241455, "lr": 1.8996479743685745e-05, "epoch": 0.3741505004324725, "percentage": 18.7, "elapsed_time": "2:43:16", "remaining_time": "11:49:48"} +{"current_steps": 758, "total_steps": 4048, "loss": 0.35270214080810547, "lr": 1.8992909329978202e-05, "epoch": 0.3746447547263067, "percentage": 18.73, "elapsed_time": "2:43:28", "remaining_time": "11:49:33"} +{"current_steps": 759, "total_steps": 4048, "loss": 0.3875473439693451, "lr": 1.8989332912732884e-05, "epoch": 0.37513900902014086, "percentage": 18.75, "elapsed_time": "2:43:41", "remaining_time": "11:49:21"} +{"current_steps": 760, "total_steps": 4048, "loss": 0.3281819820404053, "lr": 1.8985750494337353e-05, "epoch": 0.37563326331397506, "percentage": 18.77, "elapsed_time": "2:43:54", "remaining_time": "11:49:08"} +{"current_steps": 761, "total_steps": 4048, "loss": 0.4081311821937561, "lr": 1.8982162077183182e-05, "epoch": 0.3761275176078092, "percentage": 18.8, "elapsed_time": "2:44:08", "remaining_time": "11:48:56"} +{"current_steps": 762, "total_steps": 4048, "loss": 0.3546852469444275, "lr": 1.897856766366595e-05, "epoch": 0.3766217719016434, "percentage": 18.82, "elapsed_time": "2:44:20", "remaining_time": "11:48:42"} +{"current_steps": 763, "total_steps": 4048, "loss": 0.3177235424518585, "lr": 1.8974967256185234e-05, "epoch": 0.37711602619547757, "percentage": 18.85, "elapsed_time": "2:44:33", "remaining_time": "11:48:28"} +{"current_steps": 764, "total_steps": 4048, "loss": 0.3739625811576843, "lr": 1.8971360857144616e-05, "epoch": 0.3776102804893118, "percentage": 18.87, "elapsed_time": "2:44:46", "remaining_time": "11:48:16"} +{"current_steps": 765, "total_steps": 4048, "loss": 0.32039010524749756, "lr": 1.8967748468951673e-05, "epoch": 0.37810453478314593, "percentage": 18.9, "elapsed_time": "2:44:59", "remaining_time": "11:48:02"} +{"current_steps": 766, "total_steps": 4048, "loss": 0.3237234354019165, "lr": 1.8964130094017986e-05, "epoch": 0.3785987890769801, "percentage": 18.92, "elapsed_time": "2:45:12", "remaining_time": "11:47:50"} +{"current_steps": 767, "total_steps": 4048, "loss": 0.33864307403564453, "lr": 1.896050573475913e-05, "epoch": 0.3790930433708143, "percentage": 18.95, "elapsed_time": "2:45:24", "remaining_time": "11:47:36"} +{"current_steps": 768, "total_steps": 4048, "loss": 0.40412086248397827, "lr": 1.8956875393594675e-05, "epoch": 0.37958729766464844, "percentage": 18.97, "elapsed_time": "2:45:38", "remaining_time": "11:47:24"} +{"current_steps": 769, "total_steps": 4048, "loss": 0.37689530849456787, "lr": 1.8953239072948185e-05, "epoch": 0.38008155195848264, "percentage": 19.0, "elapsed_time": "2:45:50", "remaining_time": "11:47:09"} +{"current_steps": 770, "total_steps": 4048, "loss": 0.3632664680480957, "lr": 1.8949596775247215e-05, "epoch": 0.3805758062523168, "percentage": 19.02, "elapsed_time": "2:46:04", "remaining_time": "11:46:58"} +{"current_steps": 771, "total_steps": 4048, "loss": 0.384027361869812, "lr": 1.8945948502923314e-05, "epoch": 0.381070060546151, "percentage": 19.05, "elapsed_time": "2:46:16", "remaining_time": "11:46:44"} +{"current_steps": 772, "total_steps": 4048, "loss": 0.37623292207717896, "lr": 1.8942294258412012e-05, "epoch": 0.38156431483998515, "percentage": 19.07, "elapsed_time": "2:46:29", "remaining_time": "11:46:32"} +{"current_steps": 773, "total_steps": 4048, "loss": 0.3449557423591614, "lr": 1.8938634044152837e-05, "epoch": 0.38205856913381936, "percentage": 19.1, "elapsed_time": "2:46:42", "remaining_time": "11:46:17"} +{"current_steps": 774, "total_steps": 4048, "loss": 0.37977170944213867, "lr": 1.8934967862589287e-05, "epoch": 0.3825528234276535, "percentage": 19.12, "elapsed_time": "2:46:55", "remaining_time": "11:46:05"} +{"current_steps": 775, "total_steps": 4048, "loss": 0.3535463809967041, "lr": 1.893129571616886e-05, "epoch": 0.3830470777214877, "percentage": 19.15, "elapsed_time": "2:47:08", "remaining_time": "11:45:50"} +{"current_steps": 776, "total_steps": 4048, "loss": 0.3107556104660034, "lr": 1.8927617607343024e-05, "epoch": 0.38354133201532187, "percentage": 19.17, "elapsed_time": "2:47:21", "remaining_time": "11:45:38"} +{"current_steps": 777, "total_steps": 4048, "loss": 0.33028605580329895, "lr": 1.8923933538567238e-05, "epoch": 0.3840355863091561, "percentage": 19.19, "elapsed_time": "2:47:33", "remaining_time": "11:45:25"} +{"current_steps": 778, "total_steps": 4048, "loss": 0.35947421193122864, "lr": 1.8920243512300925e-05, "epoch": 0.3845298406029902, "percentage": 19.22, "elapsed_time": "2:47:47", "remaining_time": "11:45:15"} +{"current_steps": 779, "total_steps": 4048, "loss": 0.36262935400009155, "lr": 1.89165475310075e-05, "epoch": 0.38502409489682443, "percentage": 19.24, "elapsed_time": "2:48:00", "remaining_time": "11:45:03"} +{"current_steps": 780, "total_steps": 4048, "loss": 0.3441828489303589, "lr": 1.8912845597154344e-05, "epoch": 0.3855183491906586, "percentage": 19.27, "elapsed_time": "2:48:13", "remaining_time": "11:44:48"} +{"current_steps": 781, "total_steps": 4048, "loss": 0.3748928904533386, "lr": 1.8909137713212813e-05, "epoch": 0.3860126034844928, "percentage": 19.29, "elapsed_time": "2:48:25", "remaining_time": "11:44:33"} +{"current_steps": 782, "total_steps": 4048, "loss": 0.3571966588497162, "lr": 1.8905423881658248e-05, "epoch": 0.38650685777832694, "percentage": 19.32, "elapsed_time": "2:48:38", "remaining_time": "11:44:18"} +{"current_steps": 783, "total_steps": 4048, "loss": 0.3937920331954956, "lr": 1.8901704104969937e-05, "epoch": 0.38700111207216115, "percentage": 19.34, "elapsed_time": "2:48:51", "remaining_time": "11:44:05"} +{"current_steps": 784, "total_steps": 4048, "loss": 0.3641708493232727, "lr": 1.8897978385631157e-05, "epoch": 0.3874953663659953, "percentage": 19.37, "elapsed_time": "2:49:03", "remaining_time": "11:43:51"} +{"current_steps": 785, "total_steps": 4048, "loss": 0.33510833978652954, "lr": 1.8894246726129143e-05, "epoch": 0.3879896206598295, "percentage": 19.39, "elapsed_time": "2:49:16", "remaining_time": "11:43:36"} +{"current_steps": 786, "total_steps": 4048, "loss": 0.3553236722946167, "lr": 1.88905091289551e-05, "epoch": 0.38848387495366365, "percentage": 19.42, "elapsed_time": "2:49:28", "remaining_time": "11:43:21"} +{"current_steps": 787, "total_steps": 4048, "loss": 0.3802195191383362, "lr": 1.8886765596604188e-05, "epoch": 0.38897812924749786, "percentage": 19.44, "elapsed_time": "2:49:41", "remaining_time": "11:43:07"} +{"current_steps": 788, "total_steps": 4048, "loss": 0.3672805726528168, "lr": 1.8883016131575546e-05, "epoch": 0.389472383541332, "percentage": 19.47, "elapsed_time": "2:49:53", "remaining_time": "11:42:51"} +{"current_steps": 789, "total_steps": 4048, "loss": 0.35715609788894653, "lr": 1.887926073637225e-05, "epoch": 0.3899666378351662, "percentage": 19.49, "elapsed_time": "2:50:06", "remaining_time": "11:42:38"} +{"current_steps": 790, "total_steps": 4048, "loss": 0.3800659775733948, "lr": 1.8875499413501362e-05, "epoch": 0.39046089212900037, "percentage": 19.52, "elapsed_time": "2:50:18", "remaining_time": "11:42:23"} +{"current_steps": 791, "total_steps": 4048, "loss": 0.36886462569236755, "lr": 1.8871732165473878e-05, "epoch": 0.3909551464228345, "percentage": 19.54, "elapsed_time": "2:50:32", "remaining_time": "11:42:11"} +{"current_steps": 792, "total_steps": 4048, "loss": 0.37373536825180054, "lr": 1.886795899480476e-05, "epoch": 0.3914494007166687, "percentage": 19.57, "elapsed_time": "2:50:44", "remaining_time": "11:41:58"} +{"current_steps": 793, "total_steps": 4048, "loss": 0.4016551375389099, "lr": 1.8864179904012932e-05, "epoch": 0.3919436550105029, "percentage": 19.59, "elapsed_time": "2:50:56", "remaining_time": "11:41:40"} +{"current_steps": 794, "total_steps": 4048, "loss": 0.35107535123825073, "lr": 1.886039489562125e-05, "epoch": 0.3924379093043371, "percentage": 19.61, "elapsed_time": "2:51:09", "remaining_time": "11:41:24"} +{"current_steps": 795, "total_steps": 4048, "loss": 0.36280331015586853, "lr": 1.8856603972156532e-05, "epoch": 0.39293216359817124, "percentage": 19.64, "elapsed_time": "2:51:20", "remaining_time": "11:41:07"} +{"current_steps": 796, "total_steps": 4048, "loss": 0.3417884111404419, "lr": 1.885280713614955e-05, "epoch": 0.39342641789200544, "percentage": 19.66, "elapsed_time": "2:51:33", "remaining_time": "11:40:52"} +{"current_steps": 797, "total_steps": 4048, "loss": 0.3257544934749603, "lr": 1.8849004390135017e-05, "epoch": 0.3939206721858396, "percentage": 19.69, "elapsed_time": "2:51:45", "remaining_time": "11:40:35"} +{"current_steps": 798, "total_steps": 4048, "loss": 0.3694860339164734, "lr": 1.8845195736651588e-05, "epoch": 0.3944149264796738, "percentage": 19.71, "elapsed_time": "2:51:57", "remaining_time": "11:40:21"} +{"current_steps": 799, "total_steps": 4048, "loss": 0.37279266119003296, "lr": 1.8841381178241865e-05, "epoch": 0.39490918077350795, "percentage": 19.74, "elapsed_time": "2:52:09", "remaining_time": "11:40:04"} +{"current_steps": 800, "total_steps": 4048, "loss": 0.38758352398872375, "lr": 1.88375607174524e-05, "epoch": 0.39540343506734216, "percentage": 19.76, "elapsed_time": "2:52:22", "remaining_time": "11:39:49"} +{"current_steps": 801, "total_steps": 4048, "loss": 0.34098950028419495, "lr": 1.883373435683367e-05, "epoch": 0.3958976893611763, "percentage": 19.79, "elapsed_time": "2:52:39", "remaining_time": "11:39:55"} +{"current_steps": 802, "total_steps": 4048, "loss": 0.3278653621673584, "lr": 1.8829902098940105e-05, "epoch": 0.3963919436550105, "percentage": 19.81, "elapsed_time": "2:52:52", "remaining_time": "11:39:42"} +{"current_steps": 803, "total_steps": 4048, "loss": 0.3673133850097656, "lr": 1.8826063946330065e-05, "epoch": 0.39688619794884467, "percentage": 19.84, "elapsed_time": "2:53:05", "remaining_time": "11:39:28"} +{"current_steps": 804, "total_steps": 4048, "loss": 0.37917453050613403, "lr": 1.882221990156584e-05, "epoch": 0.39738045224267887, "percentage": 19.86, "elapsed_time": "2:53:18", "remaining_time": "11:39:16"} +{"current_steps": 805, "total_steps": 4048, "loss": 0.33986327052116394, "lr": 1.8818369967213662e-05, "epoch": 0.397874706536513, "percentage": 19.89, "elapsed_time": "2:53:31", "remaining_time": "11:39:02"} +{"current_steps": 806, "total_steps": 4048, "loss": 0.34402647614479065, "lr": 1.8814514145843694e-05, "epoch": 0.39836896083034723, "percentage": 19.91, "elapsed_time": "2:53:44", "remaining_time": "11:38:49"} +{"current_steps": 807, "total_steps": 4048, "loss": 0.32781803607940674, "lr": 1.8810652440030026e-05, "epoch": 0.3988632151241814, "percentage": 19.94, "elapsed_time": "2:53:56", "remaining_time": "11:38:34"} +{"current_steps": 808, "total_steps": 4048, "loss": 0.35807961225509644, "lr": 1.8806784852350678e-05, "epoch": 0.3993574694180156, "percentage": 19.96, "elapsed_time": "2:54:09", "remaining_time": "11:38:19"} +{"current_steps": 809, "total_steps": 4048, "loss": 0.33577096462249756, "lr": 1.8802911385387596e-05, "epoch": 0.39985172371184974, "percentage": 19.99, "elapsed_time": "2:54:22", "remaining_time": "11:38:07"} +{"current_steps": 810, "total_steps": 4048, "loss": 0.37786391377449036, "lr": 1.8799032041726654e-05, "epoch": 0.40034597800568394, "percentage": 20.01, "elapsed_time": "2:54:34", "remaining_time": "11:37:52"} +{"current_steps": 811, "total_steps": 4048, "loss": 0.3237725496292114, "lr": 1.879514682395764e-05, "epoch": 0.4008402322995181, "percentage": 20.03, "elapsed_time": "2:54:47", "remaining_time": "11:37:40"} +{"current_steps": 812, "total_steps": 4048, "loss": 0.29552844166755676, "lr": 1.8791255734674275e-05, "epoch": 0.4013344865933523, "percentage": 20.06, "elapsed_time": "2:55:00", "remaining_time": "11:37:26"} +{"current_steps": 813, "total_steps": 4048, "loss": 0.40317612886428833, "lr": 1.8787358776474192e-05, "epoch": 0.40182874088718645, "percentage": 20.08, "elapsed_time": "2:55:13", "remaining_time": "11:37:13"} +{"current_steps": 814, "total_steps": 4048, "loss": 0.33383694291114807, "lr": 1.8783455951958948e-05, "epoch": 0.40232299518102066, "percentage": 20.11, "elapsed_time": "2:55:25", "remaining_time": "11:36:58"} +{"current_steps": 815, "total_steps": 4048, "loss": 0.35020262002944946, "lr": 1.8779547263734012e-05, "epoch": 0.4028172494748548, "percentage": 20.13, "elapsed_time": "2:55:38", "remaining_time": "11:36:45"} +{"current_steps": 816, "total_steps": 4048, "loss": 0.3742774724960327, "lr": 1.8775632714408765e-05, "epoch": 0.403311503768689, "percentage": 20.16, "elapsed_time": "2:55:51", "remaining_time": "11:36:32"} +{"current_steps": 817, "total_steps": 4048, "loss": 0.35037580132484436, "lr": 1.8771712306596506e-05, "epoch": 0.40380575806252317, "percentage": 20.18, "elapsed_time": "2:56:05", "remaining_time": "11:36:21"} +{"current_steps": 818, "total_steps": 4048, "loss": 0.3416820168495178, "lr": 1.8767786042914445e-05, "epoch": 0.4043000123563573, "percentage": 20.21, "elapsed_time": "2:56:17", "remaining_time": "11:36:07"} +{"current_steps": 819, "total_steps": 4048, "loss": 0.33287927508354187, "lr": 1.8763853925983695e-05, "epoch": 0.4047942666501915, "percentage": 20.23, "elapsed_time": "2:56:30", "remaining_time": "11:35:54"} +{"current_steps": 820, "total_steps": 4048, "loss": 0.3493141531944275, "lr": 1.875991595842929e-05, "epoch": 0.4052885209440257, "percentage": 20.26, "elapsed_time": "2:56:43", "remaining_time": "11:35:43"} +{"current_steps": 821, "total_steps": 4048, "loss": 0.4184780418872833, "lr": 1.875597214288015e-05, "epoch": 0.4057827752378599, "percentage": 20.28, "elapsed_time": "2:56:56", "remaining_time": "11:35:29"} +{"current_steps": 822, "total_steps": 4048, "loss": 0.33189794421195984, "lr": 1.8752022481969116e-05, "epoch": 0.40627702953169403, "percentage": 20.31, "elapsed_time": "2:57:10", "remaining_time": "11:35:18"} +{"current_steps": 823, "total_steps": 4048, "loss": 0.35339856147766113, "lr": 1.8748066978332925e-05, "epoch": 0.40677128382552824, "percentage": 20.33, "elapsed_time": "2:57:22", "remaining_time": "11:35:05"} +{"current_steps": 824, "total_steps": 4048, "loss": 0.3766328692436218, "lr": 1.874410563461221e-05, "epoch": 0.4072655381193624, "percentage": 20.36, "elapsed_time": "2:57:36", "remaining_time": "11:34:54"} +{"current_steps": 825, "total_steps": 4048, "loss": 0.32575076818466187, "lr": 1.874013845345152e-05, "epoch": 0.4077597924131966, "percentage": 20.38, "elapsed_time": "2:57:49", "remaining_time": "11:34:41"} +{"current_steps": 826, "total_steps": 4048, "loss": 0.3417864441871643, "lr": 1.8736165437499273e-05, "epoch": 0.40825404670703075, "percentage": 20.41, "elapsed_time": "2:58:02", "remaining_time": "11:34:30"} +{"current_steps": 827, "total_steps": 4048, "loss": 0.3636544942855835, "lr": 1.8732186589407807e-05, "epoch": 0.40874830100086496, "percentage": 20.43, "elapsed_time": "2:58:15", "remaining_time": "11:34:16"} +{"current_steps": 828, "total_steps": 4048, "loss": 0.38730406761169434, "lr": 1.872820191183334e-05, "epoch": 0.4092425552946991, "percentage": 20.45, "elapsed_time": "2:58:28", "remaining_time": "11:34:05"} +{"current_steps": 829, "total_steps": 4048, "loss": 0.3593043088912964, "lr": 1.872421140743599e-05, "epoch": 0.4097368095885333, "percentage": 20.48, "elapsed_time": "2:58:41", "remaining_time": "11:33:52"} +{"current_steps": 830, "total_steps": 4048, "loss": 0.39092978835105896, "lr": 1.872021507887976e-05, "epoch": 0.41023106388236746, "percentage": 20.5, "elapsed_time": "2:58:55", "remaining_time": "11:33:40"} +{"current_steps": 831, "total_steps": 4048, "loss": 0.3745616674423218, "lr": 1.8716212928832537e-05, "epoch": 0.41072531817620167, "percentage": 20.53, "elapsed_time": "2:59:08", "remaining_time": "11:33:29"} +{"current_steps": 832, "total_steps": 4048, "loss": 0.39571845531463623, "lr": 1.87122049599661e-05, "epoch": 0.4112195724700358, "percentage": 20.55, "elapsed_time": "2:59:20", "remaining_time": "11:33:14"} +{"current_steps": 833, "total_steps": 4048, "loss": 0.35459476709365845, "lr": 1.8708191174956116e-05, "epoch": 0.41171382676387, "percentage": 20.58, "elapsed_time": "2:59:33", "remaining_time": "11:33:02"} +{"current_steps": 834, "total_steps": 4048, "loss": 0.38937896490097046, "lr": 1.870417157648213e-05, "epoch": 0.4122080810577042, "percentage": 20.6, "elapsed_time": "2:59:46", "remaining_time": "11:32:48"} +{"current_steps": 835, "total_steps": 4048, "loss": 0.33595120906829834, "lr": 1.8700146167227563e-05, "epoch": 0.4127023353515384, "percentage": 20.63, "elapsed_time": "2:59:59", "remaining_time": "11:32:36"} +{"current_steps": 836, "total_steps": 4048, "loss": 0.332889199256897, "lr": 1.869611494987973e-05, "epoch": 0.41319658964537254, "percentage": 20.65, "elapsed_time": "3:00:12", "remaining_time": "11:32:22"} +{"current_steps": 837, "total_steps": 4048, "loss": 0.333438515663147, "lr": 1.8692077927129803e-05, "epoch": 0.41369084393920674, "percentage": 20.68, "elapsed_time": "3:00:25", "remaining_time": "11:32:11"} +{"current_steps": 838, "total_steps": 4048, "loss": 0.30645743012428284, "lr": 1.868803510167285e-05, "epoch": 0.4141850982330409, "percentage": 20.7, "elapsed_time": "3:00:38", "remaining_time": "11:31:57"} +{"current_steps": 839, "total_steps": 4048, "loss": 0.3333967924118042, "lr": 1.86839864762078e-05, "epoch": 0.4146793525268751, "percentage": 20.73, "elapsed_time": "3:00:51", "remaining_time": "11:31:46"} +{"current_steps": 840, "total_steps": 4048, "loss": 0.36230576038360596, "lr": 1.867993205343746e-05, "epoch": 0.41517360682070925, "percentage": 20.75, "elapsed_time": "3:01:04", "remaining_time": "11:31:32"} +{"current_steps": 841, "total_steps": 4048, "loss": 0.34191709756851196, "lr": 1.8675871836068498e-05, "epoch": 0.41566786111454346, "percentage": 20.78, "elapsed_time": "3:01:17", "remaining_time": "11:31:20"} +{"current_steps": 842, "total_steps": 4048, "loss": 0.3115188479423523, "lr": 1.8671805826811462e-05, "epoch": 0.4161621154083776, "percentage": 20.8, "elapsed_time": "3:01:30", "remaining_time": "11:31:05"} +{"current_steps": 843, "total_steps": 4048, "loss": 0.3725768029689789, "lr": 1.866773402838076e-05, "epoch": 0.4166563697022118, "percentage": 20.83, "elapsed_time": "3:01:43", "remaining_time": "11:30:53"} +{"current_steps": 844, "total_steps": 4048, "loss": 0.376983642578125, "lr": 1.8663656443494673e-05, "epoch": 0.41715062399604597, "percentage": 20.85, "elapsed_time": "3:01:56", "remaining_time": "11:30:39"} +{"current_steps": 845, "total_steps": 4048, "loss": 0.31490784883499146, "lr": 1.8659573074875327e-05, "epoch": 0.4176448782898801, "percentage": 20.87, "elapsed_time": "3:02:09", "remaining_time": "11:30:27"} +{"current_steps": 846, "total_steps": 4048, "loss": 0.3533504605293274, "lr": 1.8655483925248727e-05, "epoch": 0.4181391325837143, "percentage": 20.9, "elapsed_time": "3:02:21", "remaining_time": "11:30:13"} +{"current_steps": 847, "total_steps": 4048, "loss": 0.3282274305820465, "lr": 1.8651388997344734e-05, "epoch": 0.4186333868775485, "percentage": 20.92, "elapsed_time": "3:02:34", "remaining_time": "11:29:59"} +{"current_steps": 848, "total_steps": 4048, "loss": 0.32892414927482605, "lr": 1.8647288293897055e-05, "epoch": 0.4191276411713827, "percentage": 20.95, "elapsed_time": "3:02:47", "remaining_time": "11:29:48"} +{"current_steps": 849, "total_steps": 4048, "loss": 0.40414246916770935, "lr": 1.864318181764327e-05, "epoch": 0.41962189546521683, "percentage": 20.97, "elapsed_time": "3:03:00", "remaining_time": "11:29:35"} +{"current_steps": 850, "total_steps": 4048, "loss": 0.30335378646850586, "lr": 1.8639069571324798e-05, "epoch": 0.42011614975905104, "percentage": 21.0, "elapsed_time": "3:03:13", "remaining_time": "11:29:22"} +{"current_steps": 851, "total_steps": 4048, "loss": 0.311710000038147, "lr": 1.863495155768692e-05, "epoch": 0.4206104040528852, "percentage": 21.02, "elapsed_time": "3:03:26", "remaining_time": "11:29:07"} +{"current_steps": 852, "total_steps": 4048, "loss": 0.37345218658447266, "lr": 1.8630827779478755e-05, "epoch": 0.4211046583467194, "percentage": 21.05, "elapsed_time": "3:03:39", "remaining_time": "11:28:55"} +{"current_steps": 853, "total_steps": 4048, "loss": 0.37286317348480225, "lr": 1.8626698239453287e-05, "epoch": 0.42159891264055355, "percentage": 21.07, "elapsed_time": "3:03:51", "remaining_time": "11:28:41"} +{"current_steps": 854, "total_steps": 4048, "loss": 0.3706691861152649, "lr": 1.8622562940367335e-05, "epoch": 0.42209316693438775, "percentage": 21.1, "elapsed_time": "3:04:05", "remaining_time": "11:28:29"} +{"current_steps": 855, "total_steps": 4048, "loss": 0.30183354020118713, "lr": 1.8618421884981567e-05, "epoch": 0.4225874212282219, "percentage": 21.12, "elapsed_time": "3:04:17", "remaining_time": "11:28:16"} +{"current_steps": 856, "total_steps": 4048, "loss": 0.32329827547073364, "lr": 1.8614275076060486e-05, "epoch": 0.4230816755220561, "percentage": 21.15, "elapsed_time": "3:04:31", "remaining_time": "11:28:04"} +{"current_steps": 857, "total_steps": 4048, "loss": 0.39380010962486267, "lr": 1.861012251637245e-05, "epoch": 0.42357592981589026, "percentage": 21.17, "elapsed_time": "3:04:43", "remaining_time": "11:27:50"} +{"current_steps": 858, "total_steps": 4048, "loss": 0.41745316982269287, "lr": 1.8605964208689646e-05, "epoch": 0.42407018410972447, "percentage": 21.2, "elapsed_time": "3:04:56", "remaining_time": "11:27:37"} +{"current_steps": 859, "total_steps": 4048, "loss": 0.36751389503479004, "lr": 1.86018001557881e-05, "epoch": 0.4245644384035586, "percentage": 21.22, "elapsed_time": "3:05:09", "remaining_time": "11:27:22"} +{"current_steps": 860, "total_steps": 4048, "loss": 0.36876100301742554, "lr": 1.8597630360447673e-05, "epoch": 0.4250586926973928, "percentage": 21.25, "elapsed_time": "3:05:21", "remaining_time": "11:27:07"} +{"current_steps": 861, "total_steps": 4048, "loss": 0.3473365306854248, "lr": 1.8593454825452067e-05, "epoch": 0.425552946991227, "percentage": 21.27, "elapsed_time": "3:05:34", "remaining_time": "11:26:55"} +{"current_steps": 862, "total_steps": 4048, "loss": 0.3429828882217407, "lr": 1.8589273553588802e-05, "epoch": 0.4260472012850612, "percentage": 21.29, "elapsed_time": "3:05:47", "remaining_time": "11:26:40"} +{"current_steps": 863, "total_steps": 4048, "loss": 0.3424219787120819, "lr": 1.8585086547649238e-05, "epoch": 0.42654145557889533, "percentage": 21.32, "elapsed_time": "3:06:00", "remaining_time": "11:26:29"} +{"current_steps": 864, "total_steps": 4048, "loss": 0.32187891006469727, "lr": 1.8580893810428562e-05, "epoch": 0.42703570987272954, "percentage": 21.34, "elapsed_time": "3:06:13", "remaining_time": "11:26:15"} +{"current_steps": 865, "total_steps": 4048, "loss": 0.3116072416305542, "lr": 1.8576695344725785e-05, "epoch": 0.4275299641665637, "percentage": 21.37, "elapsed_time": "3:06:26", "remaining_time": "11:26:03"} +{"current_steps": 866, "total_steps": 4048, "loss": 0.32645124197006226, "lr": 1.8572491153343742e-05, "epoch": 0.4280242184603979, "percentage": 21.39, "elapsed_time": "3:06:39", "remaining_time": "11:25:50"} +{"current_steps": 867, "total_steps": 4048, "loss": 0.36861616373062134, "lr": 1.8568281239089088e-05, "epoch": 0.42851847275423205, "percentage": 21.42, "elapsed_time": "3:06:52", "remaining_time": "11:25:37"} +{"current_steps": 868, "total_steps": 4048, "loss": 0.38477885723114014, "lr": 1.8564065604772307e-05, "epoch": 0.42901272704806626, "percentage": 21.44, "elapsed_time": "3:07:05", "remaining_time": "11:25:23"} +{"current_steps": 869, "total_steps": 4048, "loss": 0.352588951587677, "lr": 1.8559844253207694e-05, "epoch": 0.4295069813419004, "percentage": 21.47, "elapsed_time": "3:07:18", "remaining_time": "11:25:12"} +{"current_steps": 870, "total_steps": 4048, "loss": 0.43443864583969116, "lr": 1.8555617187213362e-05, "epoch": 0.43000123563573456, "percentage": 21.49, "elapsed_time": "3:07:30", "remaining_time": "11:24:57"} +{"current_steps": 871, "total_steps": 4048, "loss": 0.37355685234069824, "lr": 1.8551384409611238e-05, "epoch": 0.43049548992956876, "percentage": 21.52, "elapsed_time": "3:07:44", "remaining_time": "11:24:45"} +{"current_steps": 872, "total_steps": 4048, "loss": 0.3529026508331299, "lr": 1.854714592322707e-05, "epoch": 0.4309897442234029, "percentage": 21.54, "elapsed_time": "3:07:56", "remaining_time": "11:24:31"} +{"current_steps": 873, "total_steps": 4048, "loss": 0.3278823494911194, "lr": 1.854290173089041e-05, "epoch": 0.4314839985172371, "percentage": 21.57, "elapsed_time": "3:08:09", "remaining_time": "11:24:18"} +{"current_steps": 874, "total_steps": 4048, "loss": 0.3677588999271393, "lr": 1.8538651835434615e-05, "epoch": 0.4319782528110713, "percentage": 21.59, "elapsed_time": "3:08:22", "remaining_time": "11:24:05"} +{"current_steps": 875, "total_steps": 4048, "loss": 0.34132176637649536, "lr": 1.8534396239696852e-05, "epoch": 0.4324725071049055, "percentage": 21.62, "elapsed_time": "3:08:35", "remaining_time": "11:23:53"} +{"current_steps": 876, "total_steps": 4048, "loss": 0.3329963684082031, "lr": 1.8530134946518106e-05, "epoch": 0.43296676139873963, "percentage": 21.64, "elapsed_time": "3:08:48", "remaining_time": "11:23:41"} +{"current_steps": 877, "total_steps": 4048, "loss": 0.38435080647468567, "lr": 1.852586795874315e-05, "epoch": 0.43346101569257384, "percentage": 21.67, "elapsed_time": "3:09:01", "remaining_time": "11:23:29"} +{"current_steps": 878, "total_steps": 4048, "loss": 0.3737541735172272, "lr": 1.8521595279220564e-05, "epoch": 0.433955269986408, "percentage": 21.69, "elapsed_time": "3:09:14", "remaining_time": "11:23:15"} +{"current_steps": 879, "total_steps": 4048, "loss": 0.3676382303237915, "lr": 1.851731691080273e-05, "epoch": 0.4344495242802422, "percentage": 21.71, "elapsed_time": "3:09:27", "remaining_time": "11:23:01"} +{"current_steps": 880, "total_steps": 4048, "loss": 0.317960262298584, "lr": 1.8513032856345825e-05, "epoch": 0.43494377857407635, "percentage": 21.74, "elapsed_time": "3:09:40", "remaining_time": "11:22:49"} +{"current_steps": 881, "total_steps": 4048, "loss": 0.38857966661453247, "lr": 1.8508743118709816e-05, "epoch": 0.43543803286791055, "percentage": 21.76, "elapsed_time": "3:09:52", "remaining_time": "11:22:35"} +{"current_steps": 882, "total_steps": 4048, "loss": 0.33234506845474243, "lr": 1.8504447700758482e-05, "epoch": 0.4359322871617447, "percentage": 21.79, "elapsed_time": "3:10:06", "remaining_time": "11:22:23"} +{"current_steps": 883, "total_steps": 4048, "loss": 0.3380611538887024, "lr": 1.8500146605359375e-05, "epoch": 0.4364265414555789, "percentage": 21.81, "elapsed_time": "3:10:18", "remaining_time": "11:22:09"} +{"current_steps": 884, "total_steps": 4048, "loss": 0.36386823654174805, "lr": 1.8495839835383845e-05, "epoch": 0.43692079574941306, "percentage": 21.84, "elapsed_time": "3:10:31", "remaining_time": "11:21:57"} +{"current_steps": 885, "total_steps": 4048, "loss": 0.34711897373199463, "lr": 1.849152739370703e-05, "epoch": 0.43741505004324727, "percentage": 21.86, "elapsed_time": "3:10:44", "remaining_time": "11:21:42"} +{"current_steps": 886, "total_steps": 4048, "loss": 0.3861457109451294, "lr": 1.848720928320786e-05, "epoch": 0.4379093043370814, "percentage": 21.89, "elapsed_time": "3:10:57", "remaining_time": "11:21:31"} +{"current_steps": 887, "total_steps": 4048, "loss": 0.3387115001678467, "lr": 1.848288550676904e-05, "epoch": 0.4384035586309156, "percentage": 21.91, "elapsed_time": "3:11:10", "remaining_time": "11:21:17"} +{"current_steps": 888, "total_steps": 4048, "loss": 0.3419748842716217, "lr": 1.847855606727706e-05, "epoch": 0.4388978129247498, "percentage": 21.94, "elapsed_time": "3:11:23", "remaining_time": "11:21:03"} +{"current_steps": 889, "total_steps": 4048, "loss": 0.38184499740600586, "lr": 1.847422096762219e-05, "epoch": 0.439392067218584, "percentage": 21.96, "elapsed_time": "3:11:35", "remaining_time": "11:20:47"} +{"current_steps": 890, "total_steps": 4048, "loss": 0.3845345973968506, "lr": 1.846988021069849e-05, "epoch": 0.43988632151241813, "percentage": 21.99, "elapsed_time": "3:11:47", "remaining_time": "11:20:31"} +{"current_steps": 891, "total_steps": 4048, "loss": 0.31854647397994995, "lr": 1.8465533799403778e-05, "epoch": 0.44038057580625234, "percentage": 22.01, "elapsed_time": "3:11:59", "remaining_time": "11:20:17"} +{"current_steps": 892, "total_steps": 4048, "loss": 0.3940027356147766, "lr": 1.8461181736639658e-05, "epoch": 0.4408748301000865, "percentage": 22.04, "elapsed_time": "3:12:11", "remaining_time": "11:20:01"} +{"current_steps": 893, "total_steps": 4048, "loss": 0.3580612540245056, "lr": 1.8456824025311508e-05, "epoch": 0.4413690843939207, "percentage": 22.06, "elapsed_time": "3:12:24", "remaining_time": "11:19:46"} +{"current_steps": 894, "total_steps": 4048, "loss": 0.3662642240524292, "lr": 1.8452460668328474e-05, "epoch": 0.44186333868775485, "percentage": 22.08, "elapsed_time": "3:12:36", "remaining_time": "11:19:30"} +{"current_steps": 895, "total_steps": 4048, "loss": 0.29031360149383545, "lr": 1.8448091668603464e-05, "epoch": 0.44235759298158905, "percentage": 22.11, "elapsed_time": "3:12:48", "remaining_time": "11:19:16"} +{"current_steps": 896, "total_steps": 4048, "loss": 0.36141306161880493, "lr": 1.844371702905317e-05, "epoch": 0.4428518472754232, "percentage": 22.13, "elapsed_time": "3:13:00", "remaining_time": "11:19:00"} +{"current_steps": 897, "total_steps": 4048, "loss": 0.35286253690719604, "lr": 1.8439336752598027e-05, "epoch": 0.44334610156925736, "percentage": 22.16, "elapsed_time": "3:13:13", "remaining_time": "11:18:45"} +{"current_steps": 898, "total_steps": 4048, "loss": 0.38967087864875793, "lr": 1.8434950842162256e-05, "epoch": 0.44384035586309156, "percentage": 22.18, "elapsed_time": "3:13:26", "remaining_time": "11:18:31"} +{"current_steps": 899, "total_steps": 4048, "loss": 0.4260423183441162, "lr": 1.8430559300673824e-05, "epoch": 0.4443346101569257, "percentage": 22.21, "elapsed_time": "3:13:39", "remaining_time": "11:18:19"} +{"current_steps": 900, "total_steps": 4048, "loss": 0.35336780548095703, "lr": 1.8426162131064456e-05, "epoch": 0.4448288644507599, "percentage": 22.23, "elapsed_time": "3:13:52", "remaining_time": "11:18:06"} +{"current_steps": 901, "total_steps": 4048, "loss": 0.32953035831451416, "lr": 1.842175933626965e-05, "epoch": 0.44532311874459407, "percentage": 22.26, "elapsed_time": "3:14:11", "remaining_time": "11:18:17"} +{"current_steps": 902, "total_steps": 4048, "loss": 0.3495085537433624, "lr": 1.841735091922864e-05, "epoch": 0.4458173730384283, "percentage": 22.28, "elapsed_time": "3:14:25", "remaining_time": "11:18:06"} +{"current_steps": 903, "total_steps": 4048, "loss": 0.3774382174015045, "lr": 1.8412936882884426e-05, "epoch": 0.44631162733226243, "percentage": 22.31, "elapsed_time": "3:14:38", "remaining_time": "11:17:53"} +{"current_steps": 904, "total_steps": 4048, "loss": 0.397183358669281, "lr": 1.8408517230183756e-05, "epoch": 0.44680588162609663, "percentage": 22.33, "elapsed_time": "3:14:51", "remaining_time": "11:17:41"} +{"current_steps": 905, "total_steps": 4048, "loss": 0.4004632234573364, "lr": 1.840409196407713e-05, "epoch": 0.4473001359199308, "percentage": 22.36, "elapsed_time": "3:15:04", "remaining_time": "11:17:27"} +{"current_steps": 906, "total_steps": 4048, "loss": 0.3464478850364685, "lr": 1.8399661087518784e-05, "epoch": 0.447794390213765, "percentage": 22.38, "elapsed_time": "3:15:17", "remaining_time": "11:17:14"} +{"current_steps": 907, "total_steps": 4048, "loss": 0.38161879777908325, "lr": 1.839522460346671e-05, "epoch": 0.44828864450759914, "percentage": 22.41, "elapsed_time": "3:15:29", "remaining_time": "11:17:00"} +{"current_steps": 908, "total_steps": 4048, "loss": 0.3307412266731262, "lr": 1.839078251488265e-05, "epoch": 0.44878289880143335, "percentage": 22.43, "elapsed_time": "3:15:42", "remaining_time": "11:16:48"} +{"current_steps": 909, "total_steps": 4048, "loss": 0.3238945007324219, "lr": 1.838633482473207e-05, "epoch": 0.4492771530952675, "percentage": 22.46, "elapsed_time": "3:15:55", "remaining_time": "11:16:34"} +{"current_steps": 910, "total_steps": 4048, "loss": 0.37863802909851074, "lr": 1.8381881535984186e-05, "epoch": 0.4497714073891017, "percentage": 22.48, "elapsed_time": "3:16:08", "remaining_time": "11:16:20"} +{"current_steps": 911, "total_steps": 4048, "loss": 0.35920199751853943, "lr": 1.8377422651611955e-05, "epoch": 0.45026566168293586, "percentage": 22.5, "elapsed_time": "3:16:20", "remaining_time": "11:16:05"} +{"current_steps": 912, "total_steps": 4048, "loss": 0.3913283050060272, "lr": 1.8372958174592054e-05, "epoch": 0.45075991597677006, "percentage": 22.53, "elapsed_time": "3:16:33", "remaining_time": "11:15:52"} +{"current_steps": 913, "total_steps": 4048, "loss": 0.32950836420059204, "lr": 1.8368488107904916e-05, "epoch": 0.4512541702706042, "percentage": 22.55, "elapsed_time": "3:16:45", "remaining_time": "11:15:37"} +{"current_steps": 914, "total_steps": 4048, "loss": 0.30557066202163696, "lr": 1.8364012454534687e-05, "epoch": 0.4517484245644384, "percentage": 22.58, "elapsed_time": "3:16:58", "remaining_time": "11:15:22"} +{"current_steps": 915, "total_steps": 4048, "loss": 0.3280435800552368, "lr": 1.835953121746925e-05, "epoch": 0.4522426788582726, "percentage": 22.6, "elapsed_time": "3:17:11", "remaining_time": "11:15:10"} +{"current_steps": 916, "total_steps": 4048, "loss": 0.323611319065094, "lr": 1.835504439970021e-05, "epoch": 0.4527369331521068, "percentage": 22.63, "elapsed_time": "3:17:23", "remaining_time": "11:14:56"} +{"current_steps": 917, "total_steps": 4048, "loss": 0.3794775605201721, "lr": 1.835055200422292e-05, "epoch": 0.45323118744594093, "percentage": 22.65, "elapsed_time": "3:17:36", "remaining_time": "11:14:43"} +{"current_steps": 918, "total_steps": 4048, "loss": 0.3437816798686981, "lr": 1.8346054034036418e-05, "epoch": 0.45372544173977514, "percentage": 22.68, "elapsed_time": "3:17:49", "remaining_time": "11:14:29"} +{"current_steps": 919, "total_steps": 4048, "loss": 0.40312957763671875, "lr": 1.8341550492143497e-05, "epoch": 0.4542196960336093, "percentage": 22.7, "elapsed_time": "3:18:02", "remaining_time": "11:14:18"} +{"current_steps": 920, "total_steps": 4048, "loss": 0.33988016843795776, "lr": 1.833704138155065e-05, "epoch": 0.4547139503274435, "percentage": 22.73, "elapsed_time": "3:18:16", "remaining_time": "11:14:06"} +{"current_steps": 921, "total_steps": 4048, "loss": 0.30893969535827637, "lr": 1.83325267052681e-05, "epoch": 0.45520820462127765, "percentage": 22.75, "elapsed_time": "3:18:29", "remaining_time": "11:13:55"} +{"current_steps": 922, "total_steps": 4048, "loss": 0.3351095914840698, "lr": 1.832800646630978e-05, "epoch": 0.45570245891511185, "percentage": 22.78, "elapsed_time": "3:18:42", "remaining_time": "11:13:42"} +{"current_steps": 923, "total_steps": 4048, "loss": 0.3235122561454773, "lr": 1.8323480667693335e-05, "epoch": 0.456196713208946, "percentage": 22.8, "elapsed_time": "3:18:55", "remaining_time": "11:13:31"} +{"current_steps": 924, "total_steps": 4048, "loss": 0.3482256531715393, "lr": 1.8318949312440126e-05, "epoch": 0.45669096750278015, "percentage": 22.83, "elapsed_time": "3:19:08", "remaining_time": "11:13:18"} +{"current_steps": 925, "total_steps": 4048, "loss": 0.3577580451965332, "lr": 1.831441240357522e-05, "epoch": 0.45718522179661436, "percentage": 22.85, "elapsed_time": "3:19:21", "remaining_time": "11:13:05"} +{"current_steps": 926, "total_steps": 4048, "loss": 0.34081172943115234, "lr": 1.8309869944127386e-05, "epoch": 0.4576794760904485, "percentage": 22.88, "elapsed_time": "3:19:34", "remaining_time": "11:12:51"} +{"current_steps": 927, "total_steps": 4048, "loss": 0.4041389524936676, "lr": 1.8305321937129118e-05, "epoch": 0.4581737303842827, "percentage": 22.9, "elapsed_time": "3:19:47", "remaining_time": "11:12:38"} +{"current_steps": 928, "total_steps": 4048, "loss": 0.3014240562915802, "lr": 1.830076838561659e-05, "epoch": 0.45866798467811687, "percentage": 22.92, "elapsed_time": "3:19:59", "remaining_time": "11:12:23"} +{"current_steps": 929, "total_steps": 4048, "loss": 0.3105698823928833, "lr": 1.829620929262969e-05, "epoch": 0.4591622389719511, "percentage": 22.95, "elapsed_time": "3:20:12", "remaining_time": "11:12:10"} +{"current_steps": 930, "total_steps": 4048, "loss": 0.36114832758903503, "lr": 1.8291644661212008e-05, "epoch": 0.4596564932657852, "percentage": 22.97, "elapsed_time": "3:20:25", "remaining_time": "11:11:56"} +{"current_steps": 931, "total_steps": 4048, "loss": 0.33738240599632263, "lr": 1.828707449441082e-05, "epoch": 0.46015074755961943, "percentage": 23.0, "elapsed_time": "3:20:37", "remaining_time": "11:11:42"} +{"current_steps": 932, "total_steps": 4048, "loss": 0.3455100655555725, "lr": 1.8282498795277108e-05, "epoch": 0.4606450018534536, "percentage": 23.02, "elapsed_time": "3:20:50", "remaining_time": "11:11:27"} +{"current_steps": 933, "total_steps": 4048, "loss": 0.3622395992279053, "lr": 1.8277917566865544e-05, "epoch": 0.4611392561472878, "percentage": 23.05, "elapsed_time": "3:21:03", "remaining_time": "11:11:14"} +{"current_steps": 934, "total_steps": 4048, "loss": 0.36942192912101746, "lr": 1.8273330812234488e-05, "epoch": 0.46163351044112194, "percentage": 23.07, "elapsed_time": "3:21:15", "remaining_time": "11:11:01"} +{"current_steps": 935, "total_steps": 4048, "loss": 0.33603039383888245, "lr": 1.8268738534445996e-05, "epoch": 0.46212776473495615, "percentage": 23.1, "elapsed_time": "3:21:28", "remaining_time": "11:10:46"} +{"current_steps": 936, "total_steps": 4048, "loss": 0.34806567430496216, "lr": 1.82641407365658e-05, "epoch": 0.4626220190287903, "percentage": 23.12, "elapsed_time": "3:21:41", "remaining_time": "11:10:34"} +{"current_steps": 937, "total_steps": 4048, "loss": 0.35512328147888184, "lr": 1.8259537421663333e-05, "epoch": 0.4631162733226245, "percentage": 23.15, "elapsed_time": "3:21:53", "remaining_time": "11:10:20"} +{"current_steps": 938, "total_steps": 4048, "loss": 0.33349719643592834, "lr": 1.8254928592811695e-05, "epoch": 0.46361052761645866, "percentage": 23.17, "elapsed_time": "3:22:07", "remaining_time": "11:10:10"} +{"current_steps": 939, "total_steps": 4048, "loss": 0.3510274887084961, "lr": 1.8250314253087677e-05, "epoch": 0.46410478191029286, "percentage": 23.2, "elapsed_time": "3:22:20", "remaining_time": "11:09:57"} +{"current_steps": 940, "total_steps": 4048, "loss": 0.35831883549690247, "lr": 1.824569440557175e-05, "epoch": 0.464599036204127, "percentage": 23.22, "elapsed_time": "3:22:34", "remaining_time": "11:09:46"} +{"current_steps": 941, "total_steps": 4048, "loss": 0.353208065032959, "lr": 1.824106905334805e-05, "epoch": 0.4650932904979612, "percentage": 23.25, "elapsed_time": "3:22:47", "remaining_time": "11:09:33"} +{"current_steps": 942, "total_steps": 4048, "loss": 0.3335849642753601, "lr": 1.8236438199504402e-05, "epoch": 0.46558754479179537, "percentage": 23.27, "elapsed_time": "3:23:00", "remaining_time": "11:09:22"} +{"current_steps": 943, "total_steps": 4048, "loss": 0.346247136592865, "lr": 1.8231801847132294e-05, "epoch": 0.4660817990856296, "percentage": 23.3, "elapsed_time": "3:23:13", "remaining_time": "11:09:09"} +{"current_steps": 944, "total_steps": 4048, "loss": 0.35125380754470825, "lr": 1.8227159999326895e-05, "epoch": 0.46657605337946373, "percentage": 23.32, "elapsed_time": "3:23:27", "remaining_time": "11:08:58"} +{"current_steps": 945, "total_steps": 4048, "loss": 0.34262675046920776, "lr": 1.822251265918703e-05, "epoch": 0.46707030767329794, "percentage": 23.34, "elapsed_time": "3:23:40", "remaining_time": "11:08:45"} +{"current_steps": 946, "total_steps": 4048, "loss": 0.3437168598175049, "lr": 1.82178598298152e-05, "epoch": 0.4675645619671321, "percentage": 23.37, "elapsed_time": "3:23:52", "remaining_time": "11:08:32"} +{"current_steps": 947, "total_steps": 4048, "loss": 0.35729774832725525, "lr": 1.8213201514317565e-05, "epoch": 0.4680588162609663, "percentage": 23.39, "elapsed_time": "3:24:06", "remaining_time": "11:08:21"} +{"current_steps": 948, "total_steps": 4048, "loss": 0.36507898569107056, "lr": 1.8208537715803954e-05, "epoch": 0.46855307055480044, "percentage": 23.42, "elapsed_time": "3:24:19", "remaining_time": "11:08:09"} +{"current_steps": 949, "total_steps": 4048, "loss": 0.363017737865448, "lr": 1.8203868437387847e-05, "epoch": 0.46904732484863465, "percentage": 23.44, "elapsed_time": "3:24:33", "remaining_time": "11:07:59"} +{"current_steps": 950, "total_steps": 4048, "loss": 0.3645821511745453, "lr": 1.8199193682186388e-05, "epoch": 0.4695415791424688, "percentage": 23.47, "elapsed_time": "3:24:46", "remaining_time": "11:07:46"} +{"current_steps": 951, "total_steps": 4048, "loss": 0.3054324686527252, "lr": 1.8194513453320387e-05, "epoch": 0.47003583343630295, "percentage": 23.49, "elapsed_time": "3:24:59", "remaining_time": "11:07:34"} +{"current_steps": 952, "total_steps": 4048, "loss": 0.35003694891929626, "lr": 1.8189827753914282e-05, "epoch": 0.47053008773013716, "percentage": 23.52, "elapsed_time": "3:25:12", "remaining_time": "11:07:21"} +{"current_steps": 953, "total_steps": 4048, "loss": 0.37834814190864563, "lr": 1.8185136587096193e-05, "epoch": 0.4710243420239713, "percentage": 23.54, "elapsed_time": "3:25:25", "remaining_time": "11:07:09"} +{"current_steps": 954, "total_steps": 4048, "loss": 0.3369285464286804, "lr": 1.8180439955997867e-05, "epoch": 0.4715185963178055, "percentage": 23.57, "elapsed_time": "3:25:38", "remaining_time": "11:06:55"} +{"current_steps": 955, "total_steps": 4048, "loss": 0.3612895905971527, "lr": 1.8175737863754706e-05, "epoch": 0.47201285061163967, "percentage": 23.59, "elapsed_time": "3:25:51", "remaining_time": "11:06:41"} +{"current_steps": 956, "total_steps": 4048, "loss": 0.34393271803855896, "lr": 1.817103031350577e-05, "epoch": 0.4725071049054739, "percentage": 23.62, "elapsed_time": "3:26:03", "remaining_time": "11:06:27"} +{"current_steps": 957, "total_steps": 4048, "loss": 0.3824620544910431, "lr": 1.8166317308393745e-05, "epoch": 0.473001359199308, "percentage": 23.64, "elapsed_time": "3:26:16", "remaining_time": "11:06:13"} +{"current_steps": 958, "total_steps": 4048, "loss": 0.3092145621776581, "lr": 1.816159885156497e-05, "epoch": 0.47349561349314223, "percentage": 23.67, "elapsed_time": "3:26:29", "remaining_time": "11:06:02"} +{"current_steps": 959, "total_steps": 4048, "loss": 0.3328183889389038, "lr": 1.8156874946169414e-05, "epoch": 0.4739898677869764, "percentage": 23.69, "elapsed_time": "3:26:42", "remaining_time": "11:05:50"} +{"current_steps": 960, "total_steps": 4048, "loss": 0.3715244233608246, "lr": 1.815214559536069e-05, "epoch": 0.4744841220808106, "percentage": 23.72, "elapsed_time": "3:26:56", "remaining_time": "11:05:39"} +{"current_steps": 961, "total_steps": 4048, "loss": 0.31065690517425537, "lr": 1.814741080229605e-05, "epoch": 0.47497837637464474, "percentage": 23.74, "elapsed_time": "3:27:09", "remaining_time": "11:05:25"} +{"current_steps": 962, "total_steps": 4048, "loss": 0.3632475733757019, "lr": 1.814267057013637e-05, "epoch": 0.47547263066847895, "percentage": 23.76, "elapsed_time": "3:27:22", "remaining_time": "11:05:13"} +{"current_steps": 963, "total_steps": 4048, "loss": 0.3367992043495178, "lr": 1.813792490204616e-05, "epoch": 0.4759668849623131, "percentage": 23.79, "elapsed_time": "3:27:35", "remaining_time": "11:05:01"} +{"current_steps": 964, "total_steps": 4048, "loss": 0.37678295373916626, "lr": 1.813317380119356e-05, "epoch": 0.4764611392561473, "percentage": 23.81, "elapsed_time": "3:27:48", "remaining_time": "11:04:49"} +{"current_steps": 965, "total_steps": 4048, "loss": 0.31454166769981384, "lr": 1.8128417270750342e-05, "epoch": 0.47695539354998145, "percentage": 23.84, "elapsed_time": "3:28:01", "remaining_time": "11:04:37"} +{"current_steps": 966, "total_steps": 4048, "loss": 0.38495004177093506, "lr": 1.81236553138919e-05, "epoch": 0.47744964784381566, "percentage": 23.86, "elapsed_time": "3:28:15", "remaining_time": "11:04:26"} +{"current_steps": 967, "total_steps": 4048, "loss": 0.3867315948009491, "lr": 1.8118887933797237e-05, "epoch": 0.4779439021376498, "percentage": 23.89, "elapsed_time": "3:28:28", "remaining_time": "11:04:14"} +{"current_steps": 968, "total_steps": 4048, "loss": 0.3453156650066376, "lr": 1.8114115133648996e-05, "epoch": 0.478438156431484, "percentage": 23.91, "elapsed_time": "3:28:41", "remaining_time": "11:04:01"} +{"current_steps": 969, "total_steps": 4048, "loss": 0.34461456537246704, "lr": 1.8109336916633426e-05, "epoch": 0.47893241072531817, "percentage": 23.94, "elapsed_time": "3:28:54", "remaining_time": "11:03:49"} +{"current_steps": 970, "total_steps": 4048, "loss": 0.36489856243133545, "lr": 1.8104553285940404e-05, "epoch": 0.4794266650191524, "percentage": 23.96, "elapsed_time": "3:29:07", "remaining_time": "11:03:37"} +{"current_steps": 971, "total_steps": 4048, "loss": 0.3596840500831604, "lr": 1.80997642447634e-05, "epoch": 0.4799209193129865, "percentage": 23.99, "elapsed_time": "3:29:21", "remaining_time": "11:03:26"} +{"current_steps": 972, "total_steps": 4048, "loss": 0.3856956362724304, "lr": 1.8094969796299527e-05, "epoch": 0.48041517360682073, "percentage": 24.01, "elapsed_time": "3:29:34", "remaining_time": "11:03:13"} +{"current_steps": 973, "total_steps": 4048, "loss": 0.3235170245170593, "lr": 1.8090169943749477e-05, "epoch": 0.4809094279006549, "percentage": 24.04, "elapsed_time": "3:29:48", "remaining_time": "11:03:02"} +{"current_steps": 974, "total_steps": 4048, "loss": 0.28033584356307983, "lr": 1.8085364690317564e-05, "epoch": 0.4814036821944891, "percentage": 24.06, "elapsed_time": "3:30:01", "remaining_time": "11:02:50"} +{"current_steps": 975, "total_steps": 4048, "loss": 0.3279935419559479, "lr": 1.808055403921171e-05, "epoch": 0.48189793648832324, "percentage": 24.09, "elapsed_time": "3:30:15", "remaining_time": "11:02:40"} +{"current_steps": 976, "total_steps": 4048, "loss": 0.36426058411598206, "lr": 1.8075737993643442e-05, "epoch": 0.4823921907821574, "percentage": 24.11, "elapsed_time": "3:30:28", "remaining_time": "11:02:27"} +{"current_steps": 977, "total_steps": 4048, "loss": 0.3720256984233856, "lr": 1.8070916556827876e-05, "epoch": 0.4828864450759916, "percentage": 24.14, "elapsed_time": "3:30:41", "remaining_time": "11:02:16"} +{"current_steps": 978, "total_steps": 4048, "loss": 0.3299727439880371, "lr": 1.8066089731983735e-05, "epoch": 0.48338069936982575, "percentage": 24.16, "elapsed_time": "3:30:54", "remaining_time": "11:02:03"} +{"current_steps": 979, "total_steps": 4048, "loss": 0.3425888419151306, "lr": 1.8061257522333338e-05, "epoch": 0.48387495366365996, "percentage": 24.18, "elapsed_time": "3:31:08", "remaining_time": "11:01:52"} +{"current_steps": 980, "total_steps": 4048, "loss": 0.34109392762184143, "lr": 1.80564199311026e-05, "epoch": 0.4843692079574941, "percentage": 24.21, "elapsed_time": "3:31:20", "remaining_time": "11:01:38"} +{"current_steps": 981, "total_steps": 4048, "loss": 0.29130926728248596, "lr": 1.805157696152103e-05, "epoch": 0.4848634622513283, "percentage": 24.23, "elapsed_time": "3:31:34", "remaining_time": "11:01:27"} +{"current_steps": 982, "total_steps": 4048, "loss": 0.36200815439224243, "lr": 1.8046728616821726e-05, "epoch": 0.48535771654516247, "percentage": 24.26, "elapsed_time": "3:31:47", "remaining_time": "11:01:14"} +{"current_steps": 983, "total_steps": 4048, "loss": 0.3343828320503235, "lr": 1.8041874900241368e-05, "epoch": 0.48585197083899667, "percentage": 24.28, "elapsed_time": "3:32:00", "remaining_time": "11:01:03"} +{"current_steps": 984, "total_steps": 4048, "loss": 0.32942160964012146, "lr": 1.803701581502023e-05, "epoch": 0.4863462251328308, "percentage": 24.31, "elapsed_time": "3:32:13", "remaining_time": "11:00:50"} +{"current_steps": 985, "total_steps": 4048, "loss": 0.34390491247177124, "lr": 1.803215136440217e-05, "epoch": 0.48684047942666503, "percentage": 24.33, "elapsed_time": "3:32:26", "remaining_time": "11:00:37"} +{"current_steps": 986, "total_steps": 4048, "loss": 0.37723374366760254, "lr": 1.8027281551634622e-05, "epoch": 0.4873347337204992, "percentage": 24.36, "elapsed_time": "3:32:39", "remaining_time": "11:00:25"} +{"current_steps": 987, "total_steps": 4048, "loss": 0.3493693470954895, "lr": 1.802240637996861e-05, "epoch": 0.4878289880143334, "percentage": 24.38, "elapsed_time": "3:32:52", "remaining_time": "11:00:11"} +{"current_steps": 988, "total_steps": 4048, "loss": 0.3564317524433136, "lr": 1.8017525852658723e-05, "epoch": 0.48832324230816754, "percentage": 24.41, "elapsed_time": "3:33:05", "remaining_time": "11:00:00"} +{"current_steps": 989, "total_steps": 4048, "loss": 0.36572349071502686, "lr": 1.8012639972963136e-05, "epoch": 0.48881749660200174, "percentage": 24.43, "elapsed_time": "3:33:18", "remaining_time": "10:59:47"} +{"current_steps": 990, "total_steps": 4048, "loss": 0.31457674503326416, "lr": 1.8007748744143586e-05, "epoch": 0.4893117508958359, "percentage": 24.46, "elapsed_time": "3:33:32", "remaining_time": "10:59:36"} +{"current_steps": 991, "total_steps": 4048, "loss": 0.36191096901893616, "lr": 1.8002852169465393e-05, "epoch": 0.4898060051896701, "percentage": 24.48, "elapsed_time": "3:33:45", "remaining_time": "10:59:23"} +{"current_steps": 992, "total_steps": 4048, "loss": 0.33284491300582886, "lr": 1.799795025219744e-05, "epoch": 0.49030025948350425, "percentage": 24.51, "elapsed_time": "3:33:58", "remaining_time": "10:59:11"} +{"current_steps": 993, "total_steps": 4048, "loss": 0.3101437985897064, "lr": 1.7993042995612172e-05, "epoch": 0.49079451377733846, "percentage": 24.53, "elapsed_time": "3:34:11", "remaining_time": "10:58:58"} +{"current_steps": 994, "total_steps": 4048, "loss": 0.3196948170661926, "lr": 1.7988130402985608e-05, "epoch": 0.4912887680711726, "percentage": 24.56, "elapsed_time": "3:34:25", "remaining_time": "10:58:47"} +{"current_steps": 995, "total_steps": 4048, "loss": 0.3757585883140564, "lr": 1.7983212477597325e-05, "epoch": 0.4917830223650068, "percentage": 24.58, "elapsed_time": "3:34:38", "remaining_time": "10:58:34"} +{"current_steps": 996, "total_steps": 4048, "loss": 0.3949659466743469, "lr": 1.7978289222730454e-05, "epoch": 0.49227727665884097, "percentage": 24.6, "elapsed_time": "3:34:51", "remaining_time": "10:58:23"} +{"current_steps": 997, "total_steps": 4048, "loss": 0.3490184545516968, "lr": 1.79733606416717e-05, "epoch": 0.4927715309526752, "percentage": 24.63, "elapsed_time": "3:35:04", "remaining_time": "10:58:10"} +{"current_steps": 998, "total_steps": 4048, "loss": 0.32302743196487427, "lr": 1.7968426737711304e-05, "epoch": 0.4932657852465093, "percentage": 24.65, "elapsed_time": "3:35:17", "remaining_time": "10:57:57"} +{"current_steps": 999, "total_steps": 4048, "loss": 0.4205089807510376, "lr": 1.7963487514143073e-05, "epoch": 0.49376003954034353, "percentage": 24.68, "elapsed_time": "3:35:31", "remaining_time": "10:57:46"} +{"current_steps": 1000, "total_steps": 4048, "loss": 0.30787885189056396, "lr": 1.7958542974264363e-05, "epoch": 0.4942542938341777, "percentage": 24.7, "elapsed_time": "3:35:43", "remaining_time": "10:57:32"} +{"current_steps": 1001, "total_steps": 4048, "loss": 0.3174916207790375, "lr": 1.7953593121376075e-05, "epoch": 0.4947485481280119, "percentage": 24.73, "elapsed_time": "3:36:04", "remaining_time": "10:57:43"} +{"current_steps": 1002, "total_steps": 4048, "loss": 0.330039381980896, "lr": 1.7948637958782662e-05, "epoch": 0.49524280242184604, "percentage": 24.75, "elapsed_time": "3:36:17", "remaining_time": "10:57:30"} +{"current_steps": 1003, "total_steps": 4048, "loss": 0.3362613320350647, "lr": 1.794367748979212e-05, "epoch": 0.4957370567156802, "percentage": 24.78, "elapsed_time": "3:36:31", "remaining_time": "10:57:20"} +{"current_steps": 1004, "total_steps": 4048, "loss": 0.3479865789413452, "lr": 1.793871171771599e-05, "epoch": 0.4962313110095144, "percentage": 24.8, "elapsed_time": "3:36:44", "remaining_time": "10:57:08"} +{"current_steps": 1005, "total_steps": 4048, "loss": 0.361303448677063, "lr": 1.7933740645869345e-05, "epoch": 0.49672556530334855, "percentage": 24.83, "elapsed_time": "3:36:58", "remaining_time": "10:56:57"} +{"current_steps": 1006, "total_steps": 4048, "loss": 0.32340794801712036, "lr": 1.79287642775708e-05, "epoch": 0.49721981959718276, "percentage": 24.85, "elapsed_time": "3:37:11", "remaining_time": "10:56:45"} +{"current_steps": 1007, "total_steps": 4048, "loss": 0.3410148620605469, "lr": 1.792378261614252e-05, "epoch": 0.4977140738910169, "percentage": 24.88, "elapsed_time": "3:37:24", "remaining_time": "10:56:34"} +{"current_steps": 1008, "total_steps": 4048, "loss": 0.3332127034664154, "lr": 1.791879566491018e-05, "epoch": 0.4982083281848511, "percentage": 24.9, "elapsed_time": "3:37:38", "remaining_time": "10:56:21"} +{"current_steps": 1009, "total_steps": 4048, "loss": 0.36532774567604065, "lr": 1.7913803427202998e-05, "epoch": 0.49870258247868526, "percentage": 24.93, "elapsed_time": "3:37:51", "remaining_time": "10:56:09"} +{"current_steps": 1010, "total_steps": 4048, "loss": 0.3721959888935089, "lr": 1.7908805906353725e-05, "epoch": 0.49919683677251947, "percentage": 24.95, "elapsed_time": "3:38:04", "remaining_time": "10:55:57"} +{"current_steps": 1011, "total_steps": 4048, "loss": 0.3406672477722168, "lr": 1.7903803105698627e-05, "epoch": 0.4996910910663536, "percentage": 24.98, "elapsed_time": "3:38:17", "remaining_time": "10:55:43"} +{"current_steps": 1012, "total_steps": 4048, "loss": 0.323926717042923, "lr": 1.789879502857751e-05, "epoch": 0.5001853453601878, "percentage": 25.0, "elapsed_time": "3:38:30", "remaining_time": "10:55:32"} +{"current_steps": 1013, "total_steps": 4048, "loss": 0.36245018243789673, "lr": 1.7893781678333694e-05, "epoch": 0.500679599654022, "percentage": 25.02, "elapsed_time": "3:38:43", "remaining_time": "10:55:19"} +{"current_steps": 1014, "total_steps": 4048, "loss": 0.36145877838134766, "lr": 1.7888763058314016e-05, "epoch": 0.5011738539478562, "percentage": 25.05, "elapsed_time": "3:38:56", "remaining_time": "10:55:06"} +{"current_steps": 1015, "total_steps": 4048, "loss": 0.31398001313209534, "lr": 1.788373917186884e-05, "epoch": 0.5016681082416904, "percentage": 25.07, "elapsed_time": "3:39:09", "remaining_time": "10:54:51"} +{"current_steps": 1016, "total_steps": 4048, "loss": 0.36732447147369385, "lr": 1.7878710022352033e-05, "epoch": 0.5021623625355245, "percentage": 25.1, "elapsed_time": "3:39:21", "remaining_time": "10:54:38"} +{"current_steps": 1017, "total_steps": 4048, "loss": 0.3336929678916931, "lr": 1.787367561312099e-05, "epoch": 0.5026566168293587, "percentage": 25.12, "elapsed_time": "3:39:34", "remaining_time": "10:54:23"} +{"current_steps": 1018, "total_steps": 4048, "loss": 0.33306068181991577, "lr": 1.786863594753661e-05, "epoch": 0.5031508711231929, "percentage": 25.15, "elapsed_time": "3:39:46", "remaining_time": "10:54:09"} +{"current_steps": 1019, "total_steps": 4048, "loss": 0.32577213644981384, "lr": 1.7863591028963297e-05, "epoch": 0.5036451254170271, "percentage": 25.17, "elapsed_time": "3:39:59", "remaining_time": "10:53:54"} +{"current_steps": 1020, "total_steps": 4048, "loss": 0.33542972803115845, "lr": 1.7858540860768974e-05, "epoch": 0.5041393797108612, "percentage": 25.2, "elapsed_time": "3:40:11", "remaining_time": "10:53:39"} +{"current_steps": 1021, "total_steps": 4048, "loss": 0.3075249195098877, "lr": 1.7853485446325055e-05, "epoch": 0.5046336340046954, "percentage": 25.22, "elapsed_time": "3:40:24", "remaining_time": "10:53:25"} +{"current_steps": 1022, "total_steps": 4048, "loss": 0.3473510146141052, "lr": 1.7848424789006466e-05, "epoch": 0.5051278882985296, "percentage": 25.25, "elapsed_time": "3:40:36", "remaining_time": "10:53:10"} +{"current_steps": 1023, "total_steps": 4048, "loss": 0.3543929159641266, "lr": 1.784335889219163e-05, "epoch": 0.5056221425923638, "percentage": 25.27, "elapsed_time": "3:40:49", "remaining_time": "10:52:57"} +{"current_steps": 1024, "total_steps": 4048, "loss": 0.3198593556880951, "lr": 1.783828775926246e-05, "epoch": 0.5061163968861979, "percentage": 25.3, "elapsed_time": "3:41:01", "remaining_time": "10:52:43"} +{"current_steps": 1025, "total_steps": 4048, "loss": 0.34223973751068115, "lr": 1.783321139360438e-05, "epoch": 0.5066106511800321, "percentage": 25.32, "elapsed_time": "3:41:15", "remaining_time": "10:52:31"} +{"current_steps": 1026, "total_steps": 4048, "loss": 0.3895387351512909, "lr": 1.78281297986063e-05, "epoch": 0.5071049054738663, "percentage": 25.35, "elapsed_time": "3:41:27", "remaining_time": "10:52:18"} +{"current_steps": 1027, "total_steps": 4048, "loss": 0.35764580965042114, "lr": 1.782304297766061e-05, "epoch": 0.5075991597677005, "percentage": 25.37, "elapsed_time": "3:41:41", "remaining_time": "10:52:06"} +{"current_steps": 1028, "total_steps": 4048, "loss": 0.30859488248825073, "lr": 1.7817950934163213e-05, "epoch": 0.5080934140615346, "percentage": 25.4, "elapsed_time": "3:41:54", "remaining_time": "10:51:53"} +{"current_steps": 1029, "total_steps": 4048, "loss": 0.3554389476776123, "lr": 1.7812853671513472e-05, "epoch": 0.5085876683553688, "percentage": 25.42, "elapsed_time": "3:42:07", "remaining_time": "10:51:41"} +{"current_steps": 1030, "total_steps": 4048, "loss": 0.3528766632080078, "lr": 1.7807751193114254e-05, "epoch": 0.509081922649203, "percentage": 25.44, "elapsed_time": "3:42:20", "remaining_time": "10:51:28"} +{"current_steps": 1031, "total_steps": 4048, "loss": 0.3645275831222534, "lr": 1.78026435023719e-05, "epoch": 0.5095761769430371, "percentage": 25.47, "elapsed_time": "3:42:33", "remaining_time": "10:51:14"} +{"current_steps": 1032, "total_steps": 4048, "loss": 0.3137075901031494, "lr": 1.779753060269623e-05, "epoch": 0.5100704312368713, "percentage": 25.49, "elapsed_time": "3:42:46", "remaining_time": "10:51:02"} +{"current_steps": 1033, "total_steps": 4048, "loss": 0.31993091106414795, "lr": 1.7792412497500538e-05, "epoch": 0.5105646855307056, "percentage": 25.52, "elapsed_time": "3:42:59", "remaining_time": "10:50:49"} +{"current_steps": 1034, "total_steps": 4048, "loss": 0.3514295220375061, "lr": 1.7787289190201606e-05, "epoch": 0.5110589398245398, "percentage": 25.54, "elapsed_time": "3:43:12", "remaining_time": "10:50:37"} +{"current_steps": 1035, "total_steps": 4048, "loss": 0.3167670667171478, "lr": 1.7782160684219677e-05, "epoch": 0.5115531941183739, "percentage": 25.57, "elapsed_time": "3:43:25", "remaining_time": "10:50:23"} +{"current_steps": 1036, "total_steps": 4048, "loss": 0.3298097252845764, "lr": 1.7777026982978473e-05, "epoch": 0.5120474484122081, "percentage": 25.59, "elapsed_time": "3:43:38", "remaining_time": "10:50:12"} +{"current_steps": 1037, "total_steps": 4048, "loss": 0.3334948420524597, "lr": 1.777188808990517e-05, "epoch": 0.5125417027060423, "percentage": 25.62, "elapsed_time": "3:43:51", "remaining_time": "10:49:59"} +{"current_steps": 1038, "total_steps": 4048, "loss": 0.3705115020275116, "lr": 1.776674400843043e-05, "epoch": 0.5130359569998765, "percentage": 25.64, "elapsed_time": "3:44:05", "remaining_time": "10:49:49"} +{"current_steps": 1039, "total_steps": 4048, "loss": 0.3586978614330292, "lr": 1.7761594741988356e-05, "epoch": 0.5135302112937106, "percentage": 25.67, "elapsed_time": "3:44:18", "remaining_time": "10:49:35"} +{"current_steps": 1040, "total_steps": 4048, "loss": 0.3105466663837433, "lr": 1.7756440294016535e-05, "epoch": 0.5140244655875448, "percentage": 25.69, "elapsed_time": "3:44:31", "remaining_time": "10:49:23"} +{"current_steps": 1041, "total_steps": 4048, "loss": 0.35213470458984375, "lr": 1.7751280667956002e-05, "epoch": 0.514518719881379, "percentage": 25.72, "elapsed_time": "3:44:44", "remaining_time": "10:49:10"} +{"current_steps": 1042, "total_steps": 4048, "loss": 0.3830525875091553, "lr": 1.7746115867251245e-05, "epoch": 0.5150129741752132, "percentage": 25.74, "elapsed_time": "3:44:57", "remaining_time": "10:48:58"} +{"current_steps": 1043, "total_steps": 4048, "loss": 0.34106165170669556, "lr": 1.7740945895350215e-05, "epoch": 0.5155072284690473, "percentage": 25.77, "elapsed_time": "3:45:10", "remaining_time": "10:48:44"} +{"current_steps": 1044, "total_steps": 4048, "loss": 0.33408549427986145, "lr": 1.773577075570431e-05, "epoch": 0.5160014827628815, "percentage": 25.79, "elapsed_time": "3:45:23", "remaining_time": "10:48:33"} +{"current_steps": 1045, "total_steps": 4048, "loss": 0.32823115587234497, "lr": 1.7730590451768375e-05, "epoch": 0.5164957370567157, "percentage": 25.82, "elapsed_time": "3:45:36", "remaining_time": "10:48:19"} +{"current_steps": 1046, "total_steps": 4048, "loss": 0.2866591811180115, "lr": 1.7725404987000716e-05, "epoch": 0.5169899913505499, "percentage": 25.84, "elapsed_time": "3:45:50", "remaining_time": "10:48:08"} +{"current_steps": 1047, "total_steps": 4048, "loss": 0.34053099155426025, "lr": 1.772021436486307e-05, "epoch": 0.517484245644384, "percentage": 25.86, "elapsed_time": "3:46:02", "remaining_time": "10:47:55"} +{"current_steps": 1048, "total_steps": 4048, "loss": 0.30379486083984375, "lr": 1.771501858882062e-05, "epoch": 0.5179784999382182, "percentage": 25.89, "elapsed_time": "3:46:15", "remaining_time": "10:47:41"} +{"current_steps": 1049, "total_steps": 4048, "loss": 0.37569302320480347, "lr": 1.7709817662341998e-05, "epoch": 0.5184727542320524, "percentage": 25.91, "elapsed_time": "3:46:28", "remaining_time": "10:47:29"} +{"current_steps": 1050, "total_steps": 4048, "loss": 0.31770390272140503, "lr": 1.770461158889926e-05, "epoch": 0.5189670085258866, "percentage": 25.94, "elapsed_time": "3:46:41", "remaining_time": "10:47:16"} +{"current_steps": 1051, "total_steps": 4048, "loss": 0.34175002574920654, "lr": 1.769940037196791e-05, "epoch": 0.5194612628197207, "percentage": 25.96, "elapsed_time": "3:46:55", "remaining_time": "10:47:04"} +{"current_steps": 1052, "total_steps": 4048, "loss": 0.3634580671787262, "lr": 1.769418401502689e-05, "epoch": 0.5199555171135549, "percentage": 25.99, "elapsed_time": "3:47:07", "remaining_time": "10:46:51"} +{"current_steps": 1053, "total_steps": 4048, "loss": 0.3631044030189514, "lr": 1.7688962521558554e-05, "epoch": 0.5204497714073891, "percentage": 26.01, "elapsed_time": "3:47:21", "remaining_time": "10:46:39"} +{"current_steps": 1054, "total_steps": 4048, "loss": 0.3402160704135895, "lr": 1.7683735895048698e-05, "epoch": 0.5209440257012233, "percentage": 26.04, "elapsed_time": "3:47:34", "remaining_time": "10:46:25"} +{"current_steps": 1055, "total_steps": 4048, "loss": 0.3895665407180786, "lr": 1.7678504138986548e-05, "epoch": 0.5214382799950574, "percentage": 26.06, "elapsed_time": "3:47:47", "remaining_time": "10:46:14"} +{"current_steps": 1056, "total_steps": 4048, "loss": 0.32207030057907104, "lr": 1.767326725686475e-05, "epoch": 0.5219325342888916, "percentage": 26.09, "elapsed_time": "3:48:00", "remaining_time": "10:46:01"} +{"current_steps": 1057, "total_steps": 4048, "loss": 0.33095866441726685, "lr": 1.7668025252179363e-05, "epoch": 0.5224267885827258, "percentage": 26.11, "elapsed_time": "3:48:13", "remaining_time": "10:45:48"} +{"current_steps": 1058, "total_steps": 4048, "loss": 0.33239442110061646, "lr": 1.7662778128429883e-05, "epoch": 0.5229210428765599, "percentage": 26.14, "elapsed_time": "3:48:26", "remaining_time": "10:45:35"} +{"current_steps": 1059, "total_steps": 4048, "loss": 0.27432021498680115, "lr": 1.7657525889119212e-05, "epoch": 0.5234152971703941, "percentage": 26.16, "elapsed_time": "3:48:39", "remaining_time": "10:45:23"} +{"current_steps": 1060, "total_steps": 4048, "loss": 0.3221333622932434, "lr": 1.7652268537753672e-05, "epoch": 0.5239095514642284, "percentage": 26.19, "elapsed_time": "3:48:52", "remaining_time": "10:45:09"} +{"current_steps": 1061, "total_steps": 4048, "loss": 0.3126341700553894, "lr": 1.764700607784299e-05, "epoch": 0.5244038057580626, "percentage": 26.21, "elapsed_time": "3:49:05", "remaining_time": "10:44:57"} +{"current_steps": 1062, "total_steps": 4048, "loss": 0.33239883184432983, "lr": 1.7641738512900315e-05, "epoch": 0.5248980600518967, "percentage": 26.24, "elapsed_time": "3:49:18", "remaining_time": "10:44:45"} +{"current_steps": 1063, "total_steps": 4048, "loss": 0.30075010657310486, "lr": 1.7636465846442197e-05, "epoch": 0.5253923143457309, "percentage": 26.26, "elapsed_time": "3:49:31", "remaining_time": "10:44:32"} +{"current_steps": 1064, "total_steps": 4048, "loss": 0.3577713370323181, "lr": 1.763118808198859e-05, "epoch": 0.5258865686395651, "percentage": 26.28, "elapsed_time": "3:49:45", "remaining_time": "10:44:21"} +{"current_steps": 1065, "total_steps": 4048, "loss": 0.3483964204788208, "lr": 1.7625905223062858e-05, "epoch": 0.5263808229333993, "percentage": 26.31, "elapsed_time": "3:49:58", "remaining_time": "10:44:09"} +{"current_steps": 1066, "total_steps": 4048, "loss": 0.3622454106807709, "lr": 1.762061727319176e-05, "epoch": 0.5268750772272334, "percentage": 26.33, "elapsed_time": "3:50:12", "remaining_time": "10:43:57"} +{"current_steps": 1067, "total_steps": 4048, "loss": 0.35156917572021484, "lr": 1.761532423590545e-05, "epoch": 0.5273693315210676, "percentage": 26.36, "elapsed_time": "3:50:25", "remaining_time": "10:43:45"} +{"current_steps": 1068, "total_steps": 4048, "loss": 0.3413820266723633, "lr": 1.7610026114737498e-05, "epoch": 0.5278635858149018, "percentage": 26.38, "elapsed_time": "3:50:39", "remaining_time": "10:43:34"} +{"current_steps": 1069, "total_steps": 4048, "loss": 0.3707934021949768, "lr": 1.760472291322484e-05, "epoch": 0.528357840108736, "percentage": 26.41, "elapsed_time": "3:50:52", "remaining_time": "10:43:22"} +{"current_steps": 1070, "total_steps": 4048, "loss": 0.3472951054573059, "lr": 1.7599414634907828e-05, "epoch": 0.5288520944025701, "percentage": 26.43, "elapsed_time": "3:51:06", "remaining_time": "10:43:12"} +{"current_steps": 1071, "total_steps": 4048, "loss": 0.393882155418396, "lr": 1.7594101283330184e-05, "epoch": 0.5293463486964043, "percentage": 26.46, "elapsed_time": "3:51:19", "remaining_time": "10:42:59"} +{"current_steps": 1072, "total_steps": 4048, "loss": 0.3094913065433502, "lr": 1.758878286203903e-05, "epoch": 0.5298406029902385, "percentage": 26.48, "elapsed_time": "3:51:33", "remaining_time": "10:42:48"} +{"current_steps": 1073, "total_steps": 4048, "loss": 0.33904048800468445, "lr": 1.758345937458487e-05, "epoch": 0.5303348572840727, "percentage": 26.51, "elapsed_time": "3:51:46", "remaining_time": "10:42:36"} +{"current_steps": 1074, "total_steps": 4048, "loss": 0.3218901753425598, "lr": 1.7578130824521585e-05, "epoch": 0.5308291115779068, "percentage": 26.53, "elapsed_time": "3:51:59", "remaining_time": "10:42:24"} +{"current_steps": 1075, "total_steps": 4048, "loss": 0.31584852933883667, "lr": 1.7572797215406442e-05, "epoch": 0.531323365871741, "percentage": 26.56, "elapsed_time": "3:52:11", "remaining_time": "10:42:10"} +{"current_steps": 1076, "total_steps": 4048, "loss": 0.3449877202510834, "lr": 1.756745855080008e-05, "epoch": 0.5318176201655752, "percentage": 26.58, "elapsed_time": "3:52:25", "remaining_time": "10:41:57"} +{"current_steps": 1077, "total_steps": 4048, "loss": 0.3544886112213135, "lr": 1.756211483426651e-05, "epoch": 0.5323118744594094, "percentage": 26.61, "elapsed_time": "3:52:37", "remaining_time": "10:41:43"} +{"current_steps": 1078, "total_steps": 4048, "loss": 0.34360697865486145, "lr": 1.755676606937313e-05, "epoch": 0.5328061287532435, "percentage": 26.63, "elapsed_time": "3:52:50", "remaining_time": "10:41:31"} +{"current_steps": 1079, "total_steps": 4048, "loss": 0.3214710056781769, "lr": 1.7551412259690695e-05, "epoch": 0.5333003830470777, "percentage": 26.66, "elapsed_time": "3:53:03", "remaining_time": "10:41:17"} +{"current_steps": 1080, "total_steps": 4048, "loss": 0.33841896057128906, "lr": 1.754605340879333e-05, "epoch": 0.5337946373409119, "percentage": 26.68, "elapsed_time": "3:53:16", "remaining_time": "10:41:04"} +{"current_steps": 1081, "total_steps": 4048, "loss": 0.3134745657444, "lr": 1.7540689520258532e-05, "epoch": 0.534288891634746, "percentage": 26.7, "elapsed_time": "3:53:29", "remaining_time": "10:40:52"} +{"current_steps": 1082, "total_steps": 4048, "loss": 0.3469204306602478, "lr": 1.753532059766715e-05, "epoch": 0.5347831459285802, "percentage": 26.73, "elapsed_time": "3:53:42", "remaining_time": "10:40:38"} +{"current_steps": 1083, "total_steps": 4048, "loss": 0.39217621088027954, "lr": 1.752994664460341e-05, "epoch": 0.5352774002224144, "percentage": 26.75, "elapsed_time": "3:53:55", "remaining_time": "10:40:25"} +{"current_steps": 1084, "total_steps": 4048, "loss": 0.34482622146606445, "lr": 1.7524567664654873e-05, "epoch": 0.5357716545162486, "percentage": 26.78, "elapsed_time": "3:54:08", "remaining_time": "10:40:11"} +{"current_steps": 1085, "total_steps": 4048, "loss": 0.308369517326355, "lr": 1.751918366141248e-05, "epoch": 0.5362659088100827, "percentage": 26.8, "elapsed_time": "3:54:21", "remaining_time": "10:39:59"} +{"current_steps": 1086, "total_steps": 4048, "loss": 0.3396676480770111, "lr": 1.751379463847051e-05, "epoch": 0.5367601631039169, "percentage": 26.83, "elapsed_time": "3:54:33", "remaining_time": "10:39:45"} +{"current_steps": 1087, "total_steps": 4048, "loss": 0.3059370517730713, "lr": 1.7508400599426596e-05, "epoch": 0.5372544173977511, "percentage": 26.85, "elapsed_time": "3:54:46", "remaining_time": "10:39:33"} +{"current_steps": 1088, "total_steps": 4048, "loss": 0.31689077615737915, "lr": 1.7503001547881728e-05, "epoch": 0.5377486716915854, "percentage": 26.88, "elapsed_time": "3:54:59", "remaining_time": "10:39:19"} +{"current_steps": 1089, "total_steps": 4048, "loss": 0.37134337425231934, "lr": 1.749759748744023e-05, "epoch": 0.5382429259854195, "percentage": 26.9, "elapsed_time": "3:55:12", "remaining_time": "10:39:06"} +{"current_steps": 1090, "total_steps": 4048, "loss": 0.30404967069625854, "lr": 1.7492188421709775e-05, "epoch": 0.5387371802792537, "percentage": 26.93, "elapsed_time": "3:55:25", "remaining_time": "10:38:53"} +{"current_steps": 1091, "total_steps": 4048, "loss": 0.34773269295692444, "lr": 1.7486774354301382e-05, "epoch": 0.5392314345730879, "percentage": 26.95, "elapsed_time": "3:55:38", "remaining_time": "10:38:39"} +{"current_steps": 1092, "total_steps": 4048, "loss": 0.34448760747909546, "lr": 1.7481355288829404e-05, "epoch": 0.5397256888669221, "percentage": 26.98, "elapsed_time": "3:55:51", "remaining_time": "10:38:26"} +{"current_steps": 1093, "total_steps": 4048, "loss": 0.33557915687561035, "lr": 1.7475931228911526e-05, "epoch": 0.5402199431607562, "percentage": 27.0, "elapsed_time": "3:56:03", "remaining_time": "10:38:13"} +{"current_steps": 1094, "total_steps": 4048, "loss": 0.3216322362422943, "lr": 1.7470502178168783e-05, "epoch": 0.5407141974545904, "percentage": 27.03, "elapsed_time": "3:56:16", "remaining_time": "10:38:00"} +{"current_steps": 1095, "total_steps": 4048, "loss": 0.3175346255302429, "lr": 1.7465068140225524e-05, "epoch": 0.5412084517484246, "percentage": 27.05, "elapsed_time": "3:56:29", "remaining_time": "10:37:45"} +{"current_steps": 1096, "total_steps": 4048, "loss": 0.3150678277015686, "lr": 1.7459629118709435e-05, "epoch": 0.5417027060422588, "percentage": 27.08, "elapsed_time": "3:56:42", "remaining_time": "10:37:32"} +{"current_steps": 1097, "total_steps": 4048, "loss": 0.3372325897216797, "lr": 1.7454185117251534e-05, "epoch": 0.5421969603360929, "percentage": 27.1, "elapsed_time": "3:56:54", "remaining_time": "10:37:17"} +{"current_steps": 1098, "total_steps": 4048, "loss": 0.3460095524787903, "lr": 1.7448736139486156e-05, "epoch": 0.5426912146299271, "percentage": 27.12, "elapsed_time": "3:57:07", "remaining_time": "10:37:04"} +{"current_steps": 1099, "total_steps": 4048, "loss": 0.3465900421142578, "lr": 1.7443282189050964e-05, "epoch": 0.5431854689237613, "percentage": 27.15, "elapsed_time": "3:57:19", "remaining_time": "10:36:50"} +{"current_steps": 1100, "total_steps": 4048, "loss": 0.3707941174507141, "lr": 1.7437823269586925e-05, "epoch": 0.5436797232175955, "percentage": 27.17, "elapsed_time": "3:57:33", "remaining_time": "10:36:39"} +{"current_steps": 1101, "total_steps": 4048, "loss": 0.3317713141441345, "lr": 1.7432359384738354e-05, "epoch": 0.5441739775114296, "percentage": 27.2, "elapsed_time": "3:57:52", "remaining_time": "10:36:43"} +{"current_steps": 1102, "total_steps": 4048, "loss": 0.3391956090927124, "lr": 1.742689053815285e-05, "epoch": 0.5446682318052638, "percentage": 27.22, "elapsed_time": "3:58:06", "remaining_time": "10:36:32"} +{"current_steps": 1103, "total_steps": 4048, "loss": 0.3838513195514679, "lr": 1.742141673348134e-05, "epoch": 0.545162486099098, "percentage": 27.25, "elapsed_time": "3:58:19", "remaining_time": "10:36:19"} +{"current_steps": 1104, "total_steps": 4048, "loss": 0.4438849687576294, "lr": 1.7415937974378057e-05, "epoch": 0.5456567403929322, "percentage": 27.27, "elapsed_time": "3:58:32", "remaining_time": "10:36:05"} +{"current_steps": 1105, "total_steps": 4048, "loss": 0.35329896211624146, "lr": 1.7410454264500542e-05, "epoch": 0.5461509946867663, "percentage": 27.3, "elapsed_time": "3:58:45", "remaining_time": "10:35:53"} +{"current_steps": 1106, "total_steps": 4048, "loss": 0.3124481439590454, "lr": 1.7404965607509646e-05, "epoch": 0.5466452489806005, "percentage": 27.32, "elapsed_time": "3:58:57", "remaining_time": "10:35:39"} +{"current_steps": 1107, "total_steps": 4048, "loss": 0.3595995008945465, "lr": 1.739947200706951e-05, "epoch": 0.5471395032744347, "percentage": 27.35, "elapsed_time": "3:59:10", "remaining_time": "10:35:25"} +{"current_steps": 1108, "total_steps": 4048, "loss": 0.35914891958236694, "lr": 1.7393973466847592e-05, "epoch": 0.5476337575682688, "percentage": 27.37, "elapsed_time": "3:59:23", "remaining_time": "10:35:11"} +{"current_steps": 1109, "total_steps": 4048, "loss": 0.34034737944602966, "lr": 1.7388469990514636e-05, "epoch": 0.548128011862103, "percentage": 27.4, "elapsed_time": "3:59:36", "remaining_time": "10:34:58"} +{"current_steps": 1110, "total_steps": 4048, "loss": 0.3033643066883087, "lr": 1.7382961581744677e-05, "epoch": 0.5486222661559372, "percentage": 27.42, "elapsed_time": "3:59:48", "remaining_time": "10:34:45"} +{"current_steps": 1111, "total_steps": 4048, "loss": 0.3239862322807312, "lr": 1.737744824421506e-05, "epoch": 0.5491165204497714, "percentage": 27.45, "elapsed_time": "4:00:02", "remaining_time": "10:34:33"} +{"current_steps": 1112, "total_steps": 4048, "loss": 0.36473411321640015, "lr": 1.7371929981606403e-05, "epoch": 0.5496107747436055, "percentage": 27.47, "elapsed_time": "4:00:14", "remaining_time": "10:34:18"} +{"current_steps": 1113, "total_steps": 4048, "loss": 0.3129761517047882, "lr": 1.7366406797602625e-05, "epoch": 0.5501050290374397, "percentage": 27.5, "elapsed_time": "4:00:27", "remaining_time": "10:34:04"} +{"current_steps": 1114, "total_steps": 4048, "loss": 0.30224812030792236, "lr": 1.736087869589092e-05, "epoch": 0.550599283331274, "percentage": 27.52, "elapsed_time": "4:00:39", "remaining_time": "10:33:50"} +{"current_steps": 1115, "total_steps": 4048, "loss": 0.30045247077941895, "lr": 1.7355345680161774e-05, "epoch": 0.5510935376251082, "percentage": 27.54, "elapsed_time": "4:00:51", "remaining_time": "10:33:35"} +{"current_steps": 1116, "total_steps": 4048, "loss": 0.3356926739215851, "lr": 1.7349807754108944e-05, "epoch": 0.5515877919189422, "percentage": 27.57, "elapsed_time": "4:01:03", "remaining_time": "10:33:20"} +{"current_steps": 1117, "total_steps": 4048, "loss": 0.37749868631362915, "lr": 1.7344264921429475e-05, "epoch": 0.5520820462127765, "percentage": 27.59, "elapsed_time": "4:01:16", "remaining_time": "10:33:05"} +{"current_steps": 1118, "total_steps": 4048, "loss": 0.331012099981308, "lr": 1.733871718582368e-05, "epoch": 0.5525763005066107, "percentage": 27.62, "elapsed_time": "4:01:28", "remaining_time": "10:32:50"} +{"current_steps": 1119, "total_steps": 4048, "loss": 0.3557187020778656, "lr": 1.7333164550995153e-05, "epoch": 0.5530705548004449, "percentage": 27.64, "elapsed_time": "4:01:40", "remaining_time": "10:32:36"} +{"current_steps": 1120, "total_steps": 4048, "loss": 0.34102991223335266, "lr": 1.7327607020650744e-05, "epoch": 0.553564809094279, "percentage": 27.67, "elapsed_time": "4:01:53", "remaining_time": "10:32:22"} +{"current_steps": 1121, "total_steps": 4048, "loss": 0.328019917011261, "lr": 1.7322044598500594e-05, "epoch": 0.5540590633881132, "percentage": 27.69, "elapsed_time": "4:02:06", "remaining_time": "10:32:08"} +{"current_steps": 1122, "total_steps": 4048, "loss": 0.33980751037597656, "lr": 1.7316477288258085e-05, "epoch": 0.5545533176819474, "percentage": 27.72, "elapsed_time": "4:02:19", "remaining_time": "10:31:55"} +{"current_steps": 1123, "total_steps": 4048, "loss": 0.3460109233856201, "lr": 1.731090509363988e-05, "epoch": 0.5550475719757816, "percentage": 27.74, "elapsed_time": "4:02:31", "remaining_time": "10:31:40"} +{"current_steps": 1124, "total_steps": 4048, "loss": 0.3013002276420593, "lr": 1.730532801836589e-05, "epoch": 0.5555418262696157, "percentage": 27.77, "elapsed_time": "4:02:44", "remaining_time": "10:31:27"} +{"current_steps": 1125, "total_steps": 4048, "loss": 0.36195772886276245, "lr": 1.72997460661593e-05, "epoch": 0.5560360805634499, "percentage": 27.79, "elapsed_time": "4:02:56", "remaining_time": "10:31:13"} +{"current_steps": 1126, "total_steps": 4048, "loss": 0.3368675112724304, "lr": 1.7294159240746532e-05, "epoch": 0.5565303348572841, "percentage": 27.82, "elapsed_time": "4:03:10", "remaining_time": "10:31:01"} +{"current_steps": 1127, "total_steps": 4048, "loss": 0.36618539690971375, "lr": 1.7288567545857283e-05, "epoch": 0.5570245891511183, "percentage": 27.84, "elapsed_time": "4:03:22", "remaining_time": "10:30:48"} +{"current_steps": 1128, "total_steps": 4048, "loss": 0.3230215311050415, "lr": 1.7282970985224477e-05, "epoch": 0.5575188434449524, "percentage": 27.87, "elapsed_time": "4:03:36", "remaining_time": "10:30:36"} +{"current_steps": 1129, "total_steps": 4048, "loss": 0.38779711723327637, "lr": 1.72773695625843e-05, "epoch": 0.5580130977387866, "percentage": 27.89, "elapsed_time": "4:03:49", "remaining_time": "10:30:23"} +{"current_steps": 1130, "total_steps": 4048, "loss": 0.33910998702049255, "lr": 1.7271763281676187e-05, "epoch": 0.5585073520326208, "percentage": 27.92, "elapsed_time": "4:04:02", "remaining_time": "10:30:10"} +{"current_steps": 1131, "total_steps": 4048, "loss": 0.3526651859283447, "lr": 1.726615214624281e-05, "epoch": 0.559001606326455, "percentage": 27.94, "elapsed_time": "4:04:15", "remaining_time": "10:29:57"} +{"current_steps": 1132, "total_steps": 4048, "loss": 0.33794116973876953, "lr": 1.7260536160030077e-05, "epoch": 0.5594958606202891, "percentage": 27.96, "elapsed_time": "4:04:28", "remaining_time": "10:29:46"} +{"current_steps": 1133, "total_steps": 4048, "loss": 0.3294123411178589, "lr": 1.7254915326787145e-05, "epoch": 0.5599901149141233, "percentage": 27.99, "elapsed_time": "4:04:41", "remaining_time": "10:29:32"} +{"current_steps": 1134, "total_steps": 4048, "loss": 0.31193166971206665, "lr": 1.7249289650266402e-05, "epoch": 0.5604843692079575, "percentage": 28.01, "elapsed_time": "4:04:54", "remaining_time": "10:29:20"} +{"current_steps": 1135, "total_steps": 4048, "loss": 0.298290491104126, "lr": 1.7243659134223467e-05, "epoch": 0.5609786235017916, "percentage": 28.04, "elapsed_time": "4:05:07", "remaining_time": "10:29:07"} +{"current_steps": 1136, "total_steps": 4048, "loss": 0.3157176971435547, "lr": 1.7238023782417194e-05, "epoch": 0.5614728777956258, "percentage": 28.06, "elapsed_time": "4:05:20", "remaining_time": "10:28:54"} +{"current_steps": 1137, "total_steps": 4048, "loss": 0.3152535855770111, "lr": 1.7232383598609664e-05, "epoch": 0.56196713208946, "percentage": 28.09, "elapsed_time": "4:05:33", "remaining_time": "10:28:40"} +{"current_steps": 1138, "total_steps": 4048, "loss": 0.35004952549934387, "lr": 1.722673858656618e-05, "epoch": 0.5624613863832942, "percentage": 28.11, "elapsed_time": "4:05:45", "remaining_time": "10:28:25"} +{"current_steps": 1139, "total_steps": 4048, "loss": 0.3531174957752228, "lr": 1.722108875005527e-05, "epoch": 0.5629556406771283, "percentage": 28.14, "elapsed_time": "4:05:58", "remaining_time": "10:28:13"} +{"current_steps": 1140, "total_steps": 4048, "loss": 0.32532358169555664, "lr": 1.7215434092848693e-05, "epoch": 0.5634498949709625, "percentage": 28.16, "elapsed_time": "4:06:10", "remaining_time": "10:27:58"} +{"current_steps": 1141, "total_steps": 4048, "loss": 0.3252495229244232, "lr": 1.7209774618721408e-05, "epoch": 0.5639441492647967, "percentage": 28.19, "elapsed_time": "4:06:23", "remaining_time": "10:27:45"} +{"current_steps": 1142, "total_steps": 4048, "loss": 0.35428208112716675, "lr": 1.7204110331451603e-05, "epoch": 0.564438403558631, "percentage": 28.21, "elapsed_time": "4:06:36", "remaining_time": "10:27:31"} +{"current_steps": 1143, "total_steps": 4048, "loss": 0.37419646978378296, "lr": 1.7198441234820674e-05, "epoch": 0.564932657852465, "percentage": 28.24, "elapsed_time": "4:06:49", "remaining_time": "10:27:18"} +{"current_steps": 1144, "total_steps": 4048, "loss": 0.3342249095439911, "lr": 1.7192767332613235e-05, "epoch": 0.5654269121462993, "percentage": 28.26, "elapsed_time": "4:07:01", "remaining_time": "10:27:04"} +{"current_steps": 1145, "total_steps": 4048, "loss": 0.36827898025512695, "lr": 1.7187088628617093e-05, "epoch": 0.5659211664401335, "percentage": 28.29, "elapsed_time": "4:07:14", "remaining_time": "10:26:51"} +{"current_steps": 1146, "total_steps": 4048, "loss": 0.3560858964920044, "lr": 1.7181405126623275e-05, "epoch": 0.5664154207339677, "percentage": 28.31, "elapsed_time": "4:07:27", "remaining_time": "10:26:37"} +{"current_steps": 1147, "total_steps": 4048, "loss": 0.35333797335624695, "lr": 1.7175716830426005e-05, "epoch": 0.5669096750278018, "percentage": 28.33, "elapsed_time": "4:07:40", "remaining_time": "10:26:24"} +{"current_steps": 1148, "total_steps": 4048, "loss": 0.31053799390792847, "lr": 1.71700237438227e-05, "epoch": 0.567403929321636, "percentage": 28.36, "elapsed_time": "4:07:52", "remaining_time": "10:26:10"} +{"current_steps": 1149, "total_steps": 4048, "loss": 0.37123826146125793, "lr": 1.7164325870613998e-05, "epoch": 0.5678981836154702, "percentage": 28.38, "elapsed_time": "4:08:05", "remaining_time": "10:25:57"} +{"current_steps": 1150, "total_steps": 4048, "loss": 0.33981990814208984, "lr": 1.715862321460371e-05, "epoch": 0.5683924379093044, "percentage": 28.41, "elapsed_time": "4:08:17", "remaining_time": "10:25:42"} +{"current_steps": 1151, "total_steps": 4048, "loss": 0.34938257932662964, "lr": 1.7152915779598846e-05, "epoch": 0.5688866922031385, "percentage": 28.43, "elapsed_time": "4:08:30", "remaining_time": "10:25:29"} +{"current_steps": 1152, "total_steps": 4048, "loss": 0.3069387376308441, "lr": 1.714720356940961e-05, "epoch": 0.5693809464969727, "percentage": 28.46, "elapsed_time": "4:08:42", "remaining_time": "10:25:13"} +{"current_steps": 1153, "total_steps": 4048, "loss": 0.34879156947135925, "lr": 1.7141486587849397e-05, "epoch": 0.5698752007908069, "percentage": 28.48, "elapsed_time": "4:08:55", "remaining_time": "10:25:01"} +{"current_steps": 1154, "total_steps": 4048, "loss": 0.3624545931816101, "lr": 1.7135764838734773e-05, "epoch": 0.5703694550846411, "percentage": 28.51, "elapsed_time": "4:09:08", "remaining_time": "10:24:46"} +{"current_steps": 1155, "total_steps": 4048, "loss": 0.3548320531845093, "lr": 1.7130038325885502e-05, "epoch": 0.5708637093784752, "percentage": 28.53, "elapsed_time": "4:09:20", "remaining_time": "10:24:33"} +{"current_steps": 1156, "total_steps": 4048, "loss": 0.3004404902458191, "lr": 1.7124307053124518e-05, "epoch": 0.5713579636723094, "percentage": 28.56, "elapsed_time": "4:09:33", "remaining_time": "10:24:19"} +{"current_steps": 1157, "total_steps": 4048, "loss": 0.31545472145080566, "lr": 1.7118571024277943e-05, "epoch": 0.5718522179661436, "percentage": 28.58, "elapsed_time": "4:09:45", "remaining_time": "10:24:05"} +{"current_steps": 1158, "total_steps": 4048, "loss": 0.3116477429866791, "lr": 1.711283024317506e-05, "epoch": 0.5723464722599778, "percentage": 28.61, "elapsed_time": "4:09:59", "remaining_time": "10:23:52"} +{"current_steps": 1159, "total_steps": 4048, "loss": 0.3472268581390381, "lr": 1.710708471364834e-05, "epoch": 0.5728407265538119, "percentage": 28.63, "elapsed_time": "4:10:11", "remaining_time": "10:23:38"} +{"current_steps": 1160, "total_steps": 4048, "loss": 0.33334046602249146, "lr": 1.7101334439533414e-05, "epoch": 0.5733349808476461, "percentage": 28.66, "elapsed_time": "4:10:24", "remaining_time": "10:23:25"} +{"current_steps": 1161, "total_steps": 4048, "loss": 0.3462664783000946, "lr": 1.7095579424669074e-05, "epoch": 0.5738292351414803, "percentage": 28.68, "elapsed_time": "4:10:37", "remaining_time": "10:23:11"} +{"current_steps": 1162, "total_steps": 4048, "loss": 0.3241977393627167, "lr": 1.7089819672897304e-05, "epoch": 0.5743234894353144, "percentage": 28.71, "elapsed_time": "4:10:50", "remaining_time": "10:22:59"} +{"current_steps": 1163, "total_steps": 4048, "loss": 0.3194134533405304, "lr": 1.7084055188063217e-05, "epoch": 0.5748177437291486, "percentage": 28.73, "elapsed_time": "4:11:03", "remaining_time": "10:22:46"} +{"current_steps": 1164, "total_steps": 4048, "loss": 0.3644179701805115, "lr": 1.7078285974015103e-05, "epoch": 0.5753119980229828, "percentage": 28.75, "elapsed_time": "4:11:16", "remaining_time": "10:22:34"} +{"current_steps": 1165, "total_steps": 4048, "loss": 0.36653730273246765, "lr": 1.7072512034604412e-05, "epoch": 0.575806252316817, "percentage": 28.78, "elapsed_time": "4:11:29", "remaining_time": "10:22:21"} +{"current_steps": 1166, "total_steps": 4048, "loss": 0.3435714840888977, "lr": 1.706673337368574e-05, "epoch": 0.5763005066106511, "percentage": 28.8, "elapsed_time": "4:11:42", "remaining_time": "10:22:09"} +{"current_steps": 1167, "total_steps": 4048, "loss": 0.36935871839523315, "lr": 1.706094999511684e-05, "epoch": 0.5767947609044853, "percentage": 28.83, "elapsed_time": "4:11:55", "remaining_time": "10:21:56"} +{"current_steps": 1168, "total_steps": 4048, "loss": 0.29493796825408936, "lr": 1.7055161902758607e-05, "epoch": 0.5772890151983195, "percentage": 28.85, "elapsed_time": "4:12:08", "remaining_time": "10:21:43"} +{"current_steps": 1169, "total_steps": 4048, "loss": 0.32378828525543213, "lr": 1.70493691004751e-05, "epoch": 0.5777832694921538, "percentage": 28.88, "elapsed_time": "4:12:22", "remaining_time": "10:21:31"} +{"current_steps": 1170, "total_steps": 4048, "loss": 0.3587600588798523, "lr": 1.70435715921335e-05, "epoch": 0.5782775237859878, "percentage": 28.9, "elapsed_time": "4:12:35", "remaining_time": "10:21:18"} +{"current_steps": 1171, "total_steps": 4048, "loss": 0.31885826587677, "lr": 1.703776938160415e-05, "epoch": 0.578771778079822, "percentage": 28.93, "elapsed_time": "4:12:48", "remaining_time": "10:21:07"} +{"current_steps": 1172, "total_steps": 4048, "loss": 0.2950041890144348, "lr": 1.7031962472760514e-05, "epoch": 0.5792660323736563, "percentage": 28.95, "elapsed_time": "4:13:01", "remaining_time": "10:20:54"} +{"current_steps": 1173, "total_steps": 4048, "loss": 0.36190298199653625, "lr": 1.7026150869479208e-05, "epoch": 0.5797602866674905, "percentage": 28.98, "elapsed_time": "4:13:14", "remaining_time": "10:20:42"} +{"current_steps": 1174, "total_steps": 4048, "loss": 0.3402514159679413, "lr": 1.7020334575639972e-05, "epoch": 0.5802545409613246, "percentage": 29.0, "elapsed_time": "4:13:27", "remaining_time": "10:20:29"} +{"current_steps": 1175, "total_steps": 4048, "loss": 0.3131282925605774, "lr": 1.7014513595125684e-05, "epoch": 0.5807487952551588, "percentage": 29.03, "elapsed_time": "4:13:41", "remaining_time": "10:20:17"} +{"current_steps": 1176, "total_steps": 4048, "loss": 0.29499226808547974, "lr": 1.7008687931822344e-05, "epoch": 0.581243049548993, "percentage": 29.05, "elapsed_time": "4:13:53", "remaining_time": "10:20:03"} +{"current_steps": 1177, "total_steps": 4048, "loss": 0.36821871995925903, "lr": 1.700285758961908e-05, "epoch": 0.5817373038428272, "percentage": 29.08, "elapsed_time": "4:14:06", "remaining_time": "10:19:50"} +{"current_steps": 1178, "total_steps": 4048, "loss": 0.31486836075782776, "lr": 1.6997022572408152e-05, "epoch": 0.5822315581366613, "percentage": 29.1, "elapsed_time": "4:14:19", "remaining_time": "10:19:37"} +{"current_steps": 1179, "total_steps": 4048, "loss": 0.3176078498363495, "lr": 1.6991182884084928e-05, "epoch": 0.5827258124304955, "percentage": 29.13, "elapsed_time": "4:14:32", "remaining_time": "10:19:24"} +{"current_steps": 1180, "total_steps": 4048, "loss": 0.4130980968475342, "lr": 1.69853385285479e-05, "epoch": 0.5832200667243297, "percentage": 29.15, "elapsed_time": "4:14:44", "remaining_time": "10:19:09"} +{"current_steps": 1181, "total_steps": 4048, "loss": 0.3164641857147217, "lr": 1.697948950969868e-05, "epoch": 0.5837143210181639, "percentage": 29.17, "elapsed_time": "4:14:57", "remaining_time": "10:18:57"} +{"current_steps": 1182, "total_steps": 4048, "loss": 0.36420726776123047, "lr": 1.697363583144199e-05, "epoch": 0.584208575311998, "percentage": 29.2, "elapsed_time": "4:15:10", "remaining_time": "10:18:43"} +{"current_steps": 1183, "total_steps": 4048, "loss": 0.3279833197593689, "lr": 1.696777749768566e-05, "epoch": 0.5847028296058322, "percentage": 29.22, "elapsed_time": "4:15:23", "remaining_time": "10:18:30"} +{"current_steps": 1184, "total_steps": 4048, "loss": 0.311473548412323, "lr": 1.696191451234063e-05, "epoch": 0.5851970838996664, "percentage": 29.25, "elapsed_time": "4:15:36", "remaining_time": "10:18:16"} +{"current_steps": 1185, "total_steps": 4048, "loss": 0.32284629344940186, "lr": 1.6956046879320943e-05, "epoch": 0.5856913381935006, "percentage": 29.27, "elapsed_time": "4:15:49", "remaining_time": "10:18:04"} +{"current_steps": 1186, "total_steps": 4048, "loss": 0.3318635821342468, "lr": 1.6950174602543753e-05, "epoch": 0.5861855924873347, "percentage": 29.3, "elapsed_time": "4:16:01", "remaining_time": "10:17:50"} +{"current_steps": 1187, "total_steps": 4048, "loss": 0.3268307149410248, "lr": 1.6944297685929298e-05, "epoch": 0.5866798467811689, "percentage": 29.32, "elapsed_time": "4:16:14", "remaining_time": "10:17:36"} +{"current_steps": 1188, "total_steps": 4048, "loss": 0.31885889172554016, "lr": 1.6938416133400934e-05, "epoch": 0.5871741010750031, "percentage": 29.35, "elapsed_time": "4:16:27", "remaining_time": "10:17:24"} +{"current_steps": 1189, "total_steps": 4048, "loss": 0.29273971915245056, "lr": 1.69325299488851e-05, "epoch": 0.5876683553688372, "percentage": 29.37, "elapsed_time": "4:16:40", "remaining_time": "10:17:10"} +{"current_steps": 1190, "total_steps": 4048, "loss": 0.3585188388824463, "lr": 1.692663913631132e-05, "epoch": 0.5881626096626714, "percentage": 29.4, "elapsed_time": "4:16:53", "remaining_time": "10:16:59"} +{"current_steps": 1191, "total_steps": 4048, "loss": 0.37145692110061646, "lr": 1.6920743699612226e-05, "epoch": 0.5886568639565056, "percentage": 29.42, "elapsed_time": "4:17:06", "remaining_time": "10:16:45"} +{"current_steps": 1192, "total_steps": 4048, "loss": 0.34805262088775635, "lr": 1.691484364272352e-05, "epoch": 0.5891511182503398, "percentage": 29.45, "elapsed_time": "4:17:20", "remaining_time": "10:16:34"} +{"current_steps": 1193, "total_steps": 4048, "loss": 0.3540152907371521, "lr": 1.6908938969584002e-05, "epoch": 0.5896453725441739, "percentage": 29.47, "elapsed_time": "4:17:33", "remaining_time": "10:16:21"} +{"current_steps": 1194, "total_steps": 4048, "loss": 0.35808512568473816, "lr": 1.6903029684135545e-05, "epoch": 0.5901396268380081, "percentage": 29.5, "elapsed_time": "4:17:46", "remaining_time": "10:16:09"} +{"current_steps": 1195, "total_steps": 4048, "loss": 0.2881169021129608, "lr": 1.68971157903231e-05, "epoch": 0.5906338811318423, "percentage": 29.52, "elapsed_time": "4:17:59", "remaining_time": "10:15:55"} +{"current_steps": 1196, "total_steps": 4048, "loss": 0.33551955223083496, "lr": 1.6891197292094704e-05, "epoch": 0.5911281354256765, "percentage": 29.55, "elapsed_time": "4:18:12", "remaining_time": "10:15:43"} +{"current_steps": 1197, "total_steps": 4048, "loss": 0.30721622705459595, "lr": 1.688527419340146e-05, "epoch": 0.5916223897195106, "percentage": 29.57, "elapsed_time": "4:18:25", "remaining_time": "10:15:30"} +{"current_steps": 1198, "total_steps": 4048, "loss": 0.3296341300010681, "lr": 1.687934649819754e-05, "epoch": 0.5921166440133449, "percentage": 29.59, "elapsed_time": "4:18:38", "remaining_time": "10:15:17"} +{"current_steps": 1199, "total_steps": 4048, "loss": 0.3511606454849243, "lr": 1.6873414210440194e-05, "epoch": 0.5926108983071791, "percentage": 29.62, "elapsed_time": "4:18:51", "remaining_time": "10:15:05"} +{"current_steps": 1200, "total_steps": 4048, "loss": 0.34293919801712036, "lr": 1.6867477334089728e-05, "epoch": 0.5931051526010133, "percentage": 29.64, "elapsed_time": "4:19:04", "remaining_time": "10:14:51"} +{"current_steps": 1201, "total_steps": 4048, "loss": 0.3334580659866333, "lr": 1.686153587310952e-05, "epoch": 0.5935994068948474, "percentage": 29.67, "elapsed_time": "4:19:24", "remaining_time": "10:14:55"} +{"current_steps": 1202, "total_steps": 4048, "loss": 0.3542851209640503, "lr": 1.6855589831466e-05, "epoch": 0.5940936611886816, "percentage": 29.69, "elapsed_time": "4:19:36", "remaining_time": "10:14:41"} +{"current_steps": 1203, "total_steps": 4048, "loss": 0.30951520800590515, "lr": 1.6849639213128667e-05, "epoch": 0.5945879154825158, "percentage": 29.72, "elapsed_time": "4:19:50", "remaining_time": "10:14:29"} +{"current_steps": 1204, "total_steps": 4048, "loss": 0.333478718996048, "lr": 1.6843684022070062e-05, "epoch": 0.59508216977635, "percentage": 29.74, "elapsed_time": "4:20:03", "remaining_time": "10:14:16"} +{"current_steps": 1205, "total_steps": 4048, "loss": 0.33562588691711426, "lr": 1.683772426226579e-05, "epoch": 0.5955764240701841, "percentage": 29.77, "elapsed_time": "4:20:16", "remaining_time": "10:14:04"} +{"current_steps": 1206, "total_steps": 4048, "loss": 0.2626678943634033, "lr": 1.6831759937694497e-05, "epoch": 0.5960706783640183, "percentage": 29.79, "elapsed_time": "4:20:29", "remaining_time": "10:13:50"} +{"current_steps": 1207, "total_steps": 4048, "loss": 0.349543035030365, "lr": 1.6825791052337884e-05, "epoch": 0.5965649326578525, "percentage": 29.82, "elapsed_time": "4:20:42", "remaining_time": "10:13:38"} +{"current_steps": 1208, "total_steps": 4048, "loss": 0.3229057788848877, "lr": 1.6819817610180696e-05, "epoch": 0.5970591869516867, "percentage": 29.84, "elapsed_time": "4:20:55", "remaining_time": "10:13:26"} +{"current_steps": 1209, "total_steps": 4048, "loss": 0.32023823261260986, "lr": 1.681383961521071e-05, "epoch": 0.5975534412455208, "percentage": 29.87, "elapsed_time": "4:21:08", "remaining_time": "10:13:13"} +{"current_steps": 1210, "total_steps": 4048, "loss": 0.31556791067123413, "lr": 1.680785707141876e-05, "epoch": 0.598047695539355, "percentage": 29.89, "elapsed_time": "4:21:21", "remaining_time": "10:13:00"} +{"current_steps": 1211, "total_steps": 4048, "loss": 0.33287158608436584, "lr": 1.68018699827987e-05, "epoch": 0.5985419498331892, "percentage": 29.92, "elapsed_time": "4:21:35", "remaining_time": "10:12:48"} +{"current_steps": 1212, "total_steps": 4048, "loss": 0.28690433502197266, "lr": 1.6795878353347427e-05, "epoch": 0.5990362041270234, "percentage": 29.94, "elapsed_time": "4:21:47", "remaining_time": "10:12:35"} +{"current_steps": 1213, "total_steps": 4048, "loss": 0.3501484990119934, "lr": 1.6789882187064862e-05, "epoch": 0.5995304584208575, "percentage": 29.97, "elapsed_time": "4:22:00", "remaining_time": "10:12:21"} +{"current_steps": 1214, "total_steps": 4048, "loss": 0.3645259439945221, "lr": 1.678388148795397e-05, "epoch": 0.6000247127146917, "percentage": 29.99, "elapsed_time": "4:22:13", "remaining_time": "10:12:08"} +{"current_steps": 1215, "total_steps": 4048, "loss": 0.3270183801651001, "lr": 1.6777876260020726e-05, "epoch": 0.6005189670085259, "percentage": 30.01, "elapsed_time": "4:22:26", "remaining_time": "10:11:55"} +{"current_steps": 1216, "total_steps": 4048, "loss": 0.31767967343330383, "lr": 1.6771866507274132e-05, "epoch": 0.60101322130236, "percentage": 30.04, "elapsed_time": "4:22:39", "remaining_time": "10:11:43"} +{"current_steps": 1217, "total_steps": 4048, "loss": 0.3170120120048523, "lr": 1.6765852233726216e-05, "epoch": 0.6015074755961942, "percentage": 30.06, "elapsed_time": "4:22:53", "remaining_time": "10:11:31"} +{"current_steps": 1218, "total_steps": 4048, "loss": 0.3270176351070404, "lr": 1.6759833443392022e-05, "epoch": 0.6020017298900284, "percentage": 30.09, "elapsed_time": "4:23:06", "remaining_time": "10:11:18"} +{"current_steps": 1219, "total_steps": 4048, "loss": 0.3229079246520996, "lr": 1.6753810140289608e-05, "epoch": 0.6024959841838626, "percentage": 30.11, "elapsed_time": "4:23:18", "remaining_time": "10:11:05"} +{"current_steps": 1220, "total_steps": 4048, "loss": 0.3366449773311615, "lr": 1.6747782328440044e-05, "epoch": 0.6029902384776967, "percentage": 30.14, "elapsed_time": "4:23:32", "remaining_time": "10:10:53"} +{"current_steps": 1221, "total_steps": 4048, "loss": 0.4027010500431061, "lr": 1.674175001186741e-05, "epoch": 0.6034844927715309, "percentage": 30.16, "elapsed_time": "4:23:44", "remaining_time": "10:10:39"} +{"current_steps": 1222, "total_steps": 4048, "loss": 0.31566083431243896, "lr": 1.6735713194598798e-05, "epoch": 0.6039787470653651, "percentage": 30.19, "elapsed_time": "4:23:57", "remaining_time": "10:10:26"} +{"current_steps": 1223, "total_steps": 4048, "loss": 0.3361780047416687, "lr": 1.67296718806643e-05, "epoch": 0.6044730013591993, "percentage": 30.21, "elapsed_time": "4:24:10", "remaining_time": "10:10:13"} +{"current_steps": 1224, "total_steps": 4048, "loss": 0.3197939693927765, "lr": 1.6723626074097007e-05, "epoch": 0.6049672556530334, "percentage": 30.24, "elapsed_time": "4:24:23", "remaining_time": "10:10:01"} +{"current_steps": 1225, "total_steps": 4048, "loss": 0.32977360486984253, "lr": 1.671757577893302e-05, "epoch": 0.6054615099468676, "percentage": 30.26, "elapsed_time": "4:24:36", "remaining_time": "10:09:47"} +{"current_steps": 1226, "total_steps": 4048, "loss": 0.3434401750564575, "lr": 1.671152099921142e-05, "epoch": 0.6059557642407019, "percentage": 30.29, "elapsed_time": "4:24:49", "remaining_time": "10:09:34"} +{"current_steps": 1227, "total_steps": 4048, "loss": 0.33856305480003357, "lr": 1.67054617389743e-05, "epoch": 0.6064500185345361, "percentage": 30.31, "elapsed_time": "4:25:02", "remaining_time": "10:09:22"} +{"current_steps": 1228, "total_steps": 4048, "loss": 0.31594911217689514, "lr": 1.669939800226673e-05, "epoch": 0.6069442728283702, "percentage": 30.34, "elapsed_time": "4:25:15", "remaining_time": "10:09:08"} +{"current_steps": 1229, "total_steps": 4048, "loss": 0.32347679138183594, "lr": 1.669332979313678e-05, "epoch": 0.6074385271222044, "percentage": 30.36, "elapsed_time": "4:25:28", "remaining_time": "10:08:56"} +{"current_steps": 1230, "total_steps": 4048, "loss": 0.32733607292175293, "lr": 1.6687257115635492e-05, "epoch": 0.6079327814160386, "percentage": 30.39, "elapsed_time": "4:25:41", "remaining_time": "10:08:42"} +{"current_steps": 1231, "total_steps": 4048, "loss": 0.306827187538147, "lr": 1.6681179973816908e-05, "epoch": 0.6084270357098728, "percentage": 30.41, "elapsed_time": "4:25:55", "remaining_time": "10:08:31"} +{"current_steps": 1232, "total_steps": 4048, "loss": 0.3515884280204773, "lr": 1.667509837173803e-05, "epoch": 0.6089212900037069, "percentage": 30.43, "elapsed_time": "4:26:08", "remaining_time": "10:08:18"} +{"current_steps": 1233, "total_steps": 4048, "loss": 0.28699082136154175, "lr": 1.6669012313458862e-05, "epoch": 0.6094155442975411, "percentage": 30.46, "elapsed_time": "4:26:22", "remaining_time": "10:08:07"} +{"current_steps": 1234, "total_steps": 4048, "loss": 0.30737537145614624, "lr": 1.6662921803042356e-05, "epoch": 0.6099097985913753, "percentage": 30.48, "elapsed_time": "4:26:35", "remaining_time": "10:07:55"} +{"current_steps": 1235, "total_steps": 4048, "loss": 0.3193345069885254, "lr": 1.665682684455446e-05, "epoch": 0.6104040528852095, "percentage": 30.51, "elapsed_time": "4:26:49", "remaining_time": "10:07:44"} +{"current_steps": 1236, "total_steps": 4048, "loss": 0.3326336741447449, "lr": 1.6650727442064073e-05, "epoch": 0.6108983071790436, "percentage": 30.53, "elapsed_time": "4:27:02", "remaining_time": "10:07:31"} +{"current_steps": 1237, "total_steps": 4048, "loss": 0.2967267632484436, "lr": 1.6644623599643076e-05, "epoch": 0.6113925614728778, "percentage": 30.56, "elapsed_time": "4:27:15", "remaining_time": "10:07:20"} +{"current_steps": 1238, "total_steps": 4048, "loss": 0.3163914084434509, "lr": 1.66385153213663e-05, "epoch": 0.611886815766712, "percentage": 30.58, "elapsed_time": "4:27:28", "remaining_time": "10:07:07"} +{"current_steps": 1239, "total_steps": 4048, "loss": 0.40281808376312256, "lr": 1.663240261131155e-05, "epoch": 0.6123810700605461, "percentage": 30.61, "elapsed_time": "4:27:42", "remaining_time": "10:06:55"} +{"current_steps": 1240, "total_steps": 4048, "loss": 0.33946287631988525, "lr": 1.6626285473559586e-05, "epoch": 0.6128753243543803, "percentage": 30.63, "elapsed_time": "4:27:55", "remaining_time": "10:06:42"} +{"current_steps": 1241, "total_steps": 4048, "loss": 0.3750913143157959, "lr": 1.6620163912194114e-05, "epoch": 0.6133695786482145, "percentage": 30.66, "elapsed_time": "4:28:08", "remaining_time": "10:06:31"} +{"current_steps": 1242, "total_steps": 4048, "loss": 0.32449400424957275, "lr": 1.6614037931301804e-05, "epoch": 0.6138638329420487, "percentage": 30.68, "elapsed_time": "4:28:21", "remaining_time": "10:06:17"} +{"current_steps": 1243, "total_steps": 4048, "loss": 0.3484799861907959, "lr": 1.6607907534972277e-05, "epoch": 0.6143580872358828, "percentage": 30.71, "elapsed_time": "4:28:35", "remaining_time": "10:06:05"} +{"current_steps": 1244, "total_steps": 4048, "loss": 0.2991127669811249, "lr": 1.6601772727298095e-05, "epoch": 0.614852341529717, "percentage": 30.73, "elapsed_time": "4:28:48", "remaining_time": "10:05:53"} +{"current_steps": 1245, "total_steps": 4048, "loss": 0.339094340801239, "lr": 1.6595633512374768e-05, "epoch": 0.6153465958235512, "percentage": 30.76, "elapsed_time": "4:29:01", "remaining_time": "10:05:40"} +{"current_steps": 1246, "total_steps": 4048, "loss": 0.3147842288017273, "lr": 1.6589489894300744e-05, "epoch": 0.6158408501173854, "percentage": 30.78, "elapsed_time": "4:29:14", "remaining_time": "10:05:28"} +{"current_steps": 1247, "total_steps": 4048, "loss": 0.3036183714866638, "lr": 1.6583341877177427e-05, "epoch": 0.6163351044112195, "percentage": 30.81, "elapsed_time": "4:29:27", "remaining_time": "10:05:15"} +{"current_steps": 1248, "total_steps": 4048, "loss": 0.32657095789909363, "lr": 1.657718946510913e-05, "epoch": 0.6168293587050537, "percentage": 30.83, "elapsed_time": "4:29:41", "remaining_time": "10:05:03"} +{"current_steps": 1249, "total_steps": 4048, "loss": 0.3104664385318756, "lr": 1.6571032662203126e-05, "epoch": 0.6173236129988879, "percentage": 30.85, "elapsed_time": "4:29:54", "remaining_time": "10:04:50"} +{"current_steps": 1250, "total_steps": 4048, "loss": 0.30392807722091675, "lr": 1.6564871472569604e-05, "epoch": 0.6178178672927221, "percentage": 30.88, "elapsed_time": "4:30:07", "remaining_time": "10:04:39"} +{"current_steps": 1251, "total_steps": 4048, "loss": 0.3087356388568878, "lr": 1.655870590032169e-05, "epoch": 0.6183121215865562, "percentage": 30.9, "elapsed_time": "4:30:20", "remaining_time": "10:04:25"} +{"current_steps": 1252, "total_steps": 4048, "loss": 0.3220480978488922, "lr": 1.6552535949575427e-05, "epoch": 0.6188063758803904, "percentage": 30.93, "elapsed_time": "4:30:34", "remaining_time": "10:04:14"} +{"current_steps": 1253, "total_steps": 4048, "loss": 0.33925485610961914, "lr": 1.654636162444979e-05, "epoch": 0.6193006301742247, "percentage": 30.95, "elapsed_time": "4:30:46", "remaining_time": "10:04:01"} +{"current_steps": 1254, "total_steps": 4048, "loss": 0.3704617917537689, "lr": 1.6540182929066667e-05, "epoch": 0.6197948844680589, "percentage": 30.98, "elapsed_time": "4:31:00", "remaining_time": "10:03:49"} +{"current_steps": 1255, "total_steps": 4048, "loss": 0.33745670318603516, "lr": 1.653399986755087e-05, "epoch": 0.620289138761893, "percentage": 31.0, "elapsed_time": "4:31:13", "remaining_time": "10:03:36"} +{"current_steps": 1256, "total_steps": 4048, "loss": 0.31651467084884644, "lr": 1.6527812444030118e-05, "epoch": 0.6207833930557272, "percentage": 31.03, "elapsed_time": "4:31:27", "remaining_time": "10:03:24"} +{"current_steps": 1257, "total_steps": 4048, "loss": 0.360455185174942, "lr": 1.6521620662635053e-05, "epoch": 0.6212776473495614, "percentage": 31.05, "elapsed_time": "4:31:40", "remaining_time": "10:03:12"} +{"current_steps": 1258, "total_steps": 4048, "loss": 0.32819390296936035, "lr": 1.6515424527499214e-05, "epoch": 0.6217719016433956, "percentage": 31.08, "elapsed_time": "4:31:52", "remaining_time": "10:02:58"} +{"current_steps": 1259, "total_steps": 4048, "loss": 0.38759690523147583, "lr": 1.6509224042759053e-05, "epoch": 0.6222661559372297, "percentage": 31.1, "elapsed_time": "4:32:05", "remaining_time": "10:02:45"} +{"current_steps": 1260, "total_steps": 4048, "loss": 0.34250545501708984, "lr": 1.6503019212553932e-05, "epoch": 0.6227604102310639, "percentage": 31.13, "elapsed_time": "4:32:17", "remaining_time": "10:02:30"} +{"current_steps": 1261, "total_steps": 4048, "loss": 0.3120163679122925, "lr": 1.6496810041026097e-05, "epoch": 0.6232546645248981, "percentage": 31.15, "elapsed_time": "4:32:30", "remaining_time": "10:02:17"} +{"current_steps": 1262, "total_steps": 4048, "loss": 0.35985836386680603, "lr": 1.649059653232071e-05, "epoch": 0.6237489188187323, "percentage": 31.18, "elapsed_time": "4:32:43", "remaining_time": "10:02:03"} +{"current_steps": 1263, "total_steps": 4048, "loss": 0.3551288843154907, "lr": 1.648437869058581e-05, "epoch": 0.6242431731125664, "percentage": 31.2, "elapsed_time": "4:32:55", "remaining_time": "10:01:50"} +{"current_steps": 1264, "total_steps": 4048, "loss": 0.33047816157341003, "lr": 1.6478156519972354e-05, "epoch": 0.6247374274064006, "percentage": 31.23, "elapsed_time": "4:33:08", "remaining_time": "10:01:35"} +{"current_steps": 1265, "total_steps": 4048, "loss": 0.32909417152404785, "lr": 1.6471930024634164e-05, "epoch": 0.6252316817002348, "percentage": 31.25, "elapsed_time": "4:33:20", "remaining_time": "10:01:21"} +{"current_steps": 1266, "total_steps": 4048, "loss": 0.3726924657821655, "lr": 1.6465699208727964e-05, "epoch": 0.6257259359940689, "percentage": 31.27, "elapsed_time": "4:33:33", "remaining_time": "10:01:07"} +{"current_steps": 1267, "total_steps": 4048, "loss": 0.3569204807281494, "lr": 1.6459464076413355e-05, "epoch": 0.6262201902879031, "percentage": 31.3, "elapsed_time": "4:33:45", "remaining_time": "10:00:53"} +{"current_steps": 1268, "total_steps": 4048, "loss": 0.33798107504844666, "lr": 1.6453224631852825e-05, "epoch": 0.6267144445817373, "percentage": 31.32, "elapsed_time": "4:33:58", "remaining_time": "10:00:39"} +{"current_steps": 1269, "total_steps": 4048, "loss": 0.32891637086868286, "lr": 1.644698087921173e-05, "epoch": 0.6272086988755715, "percentage": 31.35, "elapsed_time": "4:34:10", "remaining_time": "10:00:24"} +{"current_steps": 1270, "total_steps": 4048, "loss": 0.31512969732284546, "lr": 1.644073282265832e-05, "epoch": 0.6277029531694056, "percentage": 31.37, "elapsed_time": "4:34:23", "remaining_time": "10:00:11"} +{"current_steps": 1271, "total_steps": 4048, "loss": 0.350041925907135, "lr": 1.643448046636371e-05, "epoch": 0.6281972074632398, "percentage": 31.4, "elapsed_time": "4:34:35", "remaining_time": "9:59:57"} +{"current_steps": 1272, "total_steps": 4048, "loss": 0.3248854875564575, "lr": 1.642822381450187e-05, "epoch": 0.628691461757074, "percentage": 31.42, "elapsed_time": "4:34:49", "remaining_time": "9:59:45"} +{"current_steps": 1273, "total_steps": 4048, "loss": 0.3031661808490753, "lr": 1.6421962871249662e-05, "epoch": 0.6291857160509082, "percentage": 31.45, "elapsed_time": "4:35:02", "remaining_time": "9:59:33"} +{"current_steps": 1274, "total_steps": 4048, "loss": 0.2903754711151123, "lr": 1.6415697640786802e-05, "epoch": 0.6296799703447423, "percentage": 31.47, "elapsed_time": "4:35:15", "remaining_time": "9:59:21"} +{"current_steps": 1275, "total_steps": 4048, "loss": 0.300454318523407, "lr": 1.6409428127295864e-05, "epoch": 0.6301742246385765, "percentage": 31.5, "elapsed_time": "4:35:28", "remaining_time": "9:59:08"} +{"current_steps": 1276, "total_steps": 4048, "loss": 0.3430244028568268, "lr": 1.6403154334962286e-05, "epoch": 0.6306684789324107, "percentage": 31.52, "elapsed_time": "4:35:42", "remaining_time": "9:58:56"} +{"current_steps": 1277, "total_steps": 4048, "loss": 0.3728436827659607, "lr": 1.6396876267974367e-05, "epoch": 0.6311627332262449, "percentage": 31.55, "elapsed_time": "4:35:55", "remaining_time": "9:58:43"} +{"current_steps": 1278, "total_steps": 4048, "loss": 0.3021183907985687, "lr": 1.639059393052325e-05, "epoch": 0.631656987520079, "percentage": 31.57, "elapsed_time": "4:36:08", "remaining_time": "9:58:31"} +{"current_steps": 1279, "total_steps": 4048, "loss": 0.3313615918159485, "lr": 1.6384307326802934e-05, "epoch": 0.6321512418139132, "percentage": 31.6, "elapsed_time": "4:36:21", "remaining_time": "9:58:18"} +{"current_steps": 1280, "total_steps": 4048, "loss": 0.32833239436149597, "lr": 1.637801646101027e-05, "epoch": 0.6326454961077475, "percentage": 31.62, "elapsed_time": "4:36:33", "remaining_time": "9:58:04"} +{"current_steps": 1281, "total_steps": 4048, "loss": 0.3575769066810608, "lr": 1.6371721337344947e-05, "epoch": 0.6331397504015817, "percentage": 31.65, "elapsed_time": "4:36:47", "remaining_time": "9:57:52"} +{"current_steps": 1282, "total_steps": 4048, "loss": 0.33323729038238525, "lr": 1.6365421960009502e-05, "epoch": 0.6336340046954158, "percentage": 31.67, "elapsed_time": "4:37:00", "remaining_time": "9:57:40"} +{"current_steps": 1283, "total_steps": 4048, "loss": 0.3522900938987732, "lr": 1.6359118333209307e-05, "epoch": 0.63412825898925, "percentage": 31.69, "elapsed_time": "4:37:13", "remaining_time": "9:57:27"} +{"current_steps": 1284, "total_steps": 4048, "loss": 0.3350796699523926, "lr": 1.635281046115257e-05, "epoch": 0.6346225132830842, "percentage": 31.72, "elapsed_time": "4:37:26", "remaining_time": "9:57:15"} +{"current_steps": 1285, "total_steps": 4048, "loss": 0.350632905960083, "lr": 1.6346498348050342e-05, "epoch": 0.6351167675769184, "percentage": 31.74, "elapsed_time": "4:37:40", "remaining_time": "9:57:04"} +{"current_steps": 1286, "total_steps": 4048, "loss": 0.2961253523826599, "lr": 1.6340181998116494e-05, "epoch": 0.6356110218707525, "percentage": 31.77, "elapsed_time": "4:37:54", "remaining_time": "9:56:51"} +{"current_steps": 1287, "total_steps": 4048, "loss": 0.35736170411109924, "lr": 1.6333861415567736e-05, "epoch": 0.6361052761645867, "percentage": 31.79, "elapsed_time": "4:38:07", "remaining_time": "9:56:40"} +{"current_steps": 1288, "total_steps": 4048, "loss": 0.35654571652412415, "lr": 1.63275366046236e-05, "epoch": 0.6365995304584209, "percentage": 31.82, "elapsed_time": "4:38:20", "remaining_time": "9:56:27"} +{"current_steps": 1289, "total_steps": 4048, "loss": 0.30518224835395813, "lr": 1.6321207569506435e-05, "epoch": 0.6370937847522551, "percentage": 31.84, "elapsed_time": "4:38:34", "remaining_time": "9:56:15"} +{"current_steps": 1290, "total_steps": 4048, "loss": 0.35099470615386963, "lr": 1.6314874314441413e-05, "epoch": 0.6375880390460892, "percentage": 31.87, "elapsed_time": "4:38:47", "remaining_time": "9:56:02"} +{"current_steps": 1291, "total_steps": 4048, "loss": 0.3577536344528198, "lr": 1.6308536843656528e-05, "epoch": 0.6380822933399234, "percentage": 31.89, "elapsed_time": "4:39:00", "remaining_time": "9:55:50"} +{"current_steps": 1292, "total_steps": 4048, "loss": 0.3141167163848877, "lr": 1.6302195161382586e-05, "epoch": 0.6385765476337576, "percentage": 31.92, "elapsed_time": "4:39:13", "remaining_time": "9:55:37"} +{"current_steps": 1293, "total_steps": 4048, "loss": 0.2920055389404297, "lr": 1.62958492718532e-05, "epoch": 0.6390708019275917, "percentage": 31.94, "elapsed_time": "4:39:27", "remaining_time": "9:55:25"} +{"current_steps": 1294, "total_steps": 4048, "loss": 0.32826486229896545, "lr": 1.6289499179304797e-05, "epoch": 0.6395650562214259, "percentage": 31.97, "elapsed_time": "4:39:39", "remaining_time": "9:55:12"} +{"current_steps": 1295, "total_steps": 4048, "loss": 0.3080480992794037, "lr": 1.628314488797661e-05, "epoch": 0.6400593105152601, "percentage": 31.99, "elapsed_time": "4:39:53", "remaining_time": "9:55:00"} +{"current_steps": 1296, "total_steps": 4048, "loss": 0.304529070854187, "lr": 1.627678640211067e-05, "epoch": 0.6405535648090943, "percentage": 32.02, "elapsed_time": "4:40:06", "remaining_time": "9:54:47"} +{"current_steps": 1297, "total_steps": 4048, "loss": 0.34653496742248535, "lr": 1.627042372595181e-05, "epoch": 0.6410478191029284, "percentage": 32.04, "elapsed_time": "4:40:19", "remaining_time": "9:54:34"} +{"current_steps": 1298, "total_steps": 4048, "loss": 0.2938673496246338, "lr": 1.6264056863747667e-05, "epoch": 0.6415420733967626, "percentage": 32.07, "elapsed_time": "4:40:32", "remaining_time": "9:54:22"} +{"current_steps": 1299, "total_steps": 4048, "loss": 0.32350343465805054, "lr": 1.625768581974866e-05, "epoch": 0.6420363276905968, "percentage": 32.09, "elapsed_time": "4:40:45", "remaining_time": "9:54:09"} +{"current_steps": 1300, "total_steps": 4048, "loss": 0.3175384998321533, "lr": 1.6251310598208015e-05, "epoch": 0.642530581984431, "percentage": 32.11, "elapsed_time": "4:40:59", "remaining_time": "9:53:57"} +{"current_steps": 1301, "total_steps": 4048, "loss": 0.32667648792266846, "lr": 1.6244931203381734e-05, "epoch": 0.6430248362782651, "percentage": 32.14, "elapsed_time": "4:41:17", "remaining_time": "9:53:56"} +{"current_steps": 1302, "total_steps": 4048, "loss": 0.30110976099967957, "lr": 1.623854763952861e-05, "epoch": 0.6435190905720993, "percentage": 32.16, "elapsed_time": "4:41:30", "remaining_time": "9:53:43"} +{"current_steps": 1303, "total_steps": 4048, "loss": 0.3508617579936981, "lr": 1.6232159910910224e-05, "epoch": 0.6440133448659335, "percentage": 32.19, "elapsed_time": "4:41:43", "remaining_time": "9:53:29"} +{"current_steps": 1304, "total_steps": 4048, "loss": 0.34416183829307556, "lr": 1.622576802179092e-05, "epoch": 0.6445075991597677, "percentage": 32.21, "elapsed_time": "4:41:56", "remaining_time": "9:53:18"} +{"current_steps": 1305, "total_steps": 4048, "loss": 0.3509306311607361, "lr": 1.6219371976437847e-05, "epoch": 0.6450018534536018, "percentage": 32.24, "elapsed_time": "4:42:09", "remaining_time": "9:53:04"} +{"current_steps": 1306, "total_steps": 4048, "loss": 0.36186683177948, "lr": 1.6212971779120904e-05, "epoch": 0.645496107747436, "percentage": 32.26, "elapsed_time": "4:42:23", "remaining_time": "9:52:53"} +{"current_steps": 1307, "total_steps": 4048, "loss": 0.3123924732208252, "lr": 1.6206567434112776e-05, "epoch": 0.6459903620412702, "percentage": 32.29, "elapsed_time": "4:42:36", "remaining_time": "9:52:39"} +{"current_steps": 1308, "total_steps": 4048, "loss": 0.3691411018371582, "lr": 1.6200158945688907e-05, "epoch": 0.6464846163351045, "percentage": 32.31, "elapsed_time": "4:42:49", "remaining_time": "9:52:27"} +{"current_steps": 1309, "total_steps": 4048, "loss": 0.3136986792087555, "lr": 1.6193746318127516e-05, "epoch": 0.6469788706289386, "percentage": 32.34, "elapsed_time": "4:43:02", "remaining_time": "9:52:15"} +{"current_steps": 1310, "total_steps": 4048, "loss": 0.30374211072921753, "lr": 1.6187329555709585e-05, "epoch": 0.6474731249227728, "percentage": 32.36, "elapsed_time": "4:43:15", "remaining_time": "9:52:02"} +{"current_steps": 1311, "total_steps": 4048, "loss": 0.3633323907852173, "lr": 1.618090866271884e-05, "epoch": 0.647967379216607, "percentage": 32.39, "elapsed_time": "4:43:28", "remaining_time": "9:51:49"} +{"current_steps": 1312, "total_steps": 4048, "loss": 0.31395208835601807, "lr": 1.6174483643441795e-05, "epoch": 0.6484616335104412, "percentage": 32.41, "elapsed_time": "4:43:41", "remaining_time": "9:51:36"} +{"current_steps": 1313, "total_steps": 4048, "loss": 0.29258471727371216, "lr": 1.6168054502167687e-05, "epoch": 0.6489558878042753, "percentage": 32.44, "elapsed_time": "4:43:55", "remaining_time": "9:51:24"} +{"current_steps": 1314, "total_steps": 4048, "loss": 0.3086007833480835, "lr": 1.6161621243188528e-05, "epoch": 0.6494501420981095, "percentage": 32.46, "elapsed_time": "4:44:08", "remaining_time": "9:51:12"} +{"current_steps": 1315, "total_steps": 4048, "loss": 0.3604614734649658, "lr": 1.6155183870799063e-05, "epoch": 0.6499443963919437, "percentage": 32.49, "elapsed_time": "4:44:22", "remaining_time": "9:51:00"} +{"current_steps": 1316, "total_steps": 4048, "loss": 0.3784678876399994, "lr": 1.614874238929679e-05, "epoch": 0.6504386506857779, "percentage": 32.51, "elapsed_time": "4:44:35", "remaining_time": "9:50:47"} +{"current_steps": 1317, "total_steps": 4048, "loss": 0.29009610414505005, "lr": 1.6142296802981957e-05, "epoch": 0.650932904979612, "percentage": 32.53, "elapsed_time": "4:44:48", "remaining_time": "9:50:36"} +{"current_steps": 1318, "total_steps": 4048, "loss": 0.3667104244232178, "lr": 1.6135847116157542e-05, "epoch": 0.6514271592734462, "percentage": 32.56, "elapsed_time": "4:45:01", "remaining_time": "9:50:22"} +{"current_steps": 1319, "total_steps": 4048, "loss": 0.3100985884666443, "lr": 1.6129393333129262e-05, "epoch": 0.6519214135672804, "percentage": 32.58, "elapsed_time": "4:45:14", "remaining_time": "9:50:09"} +{"current_steps": 1320, "total_steps": 4048, "loss": 0.34128522872924805, "lr": 1.612293545820557e-05, "epoch": 0.6524156678611145, "percentage": 32.61, "elapsed_time": "4:45:27", "remaining_time": "9:49:55"} +{"current_steps": 1321, "total_steps": 4048, "loss": 0.3017216920852661, "lr": 1.611647349569765e-05, "epoch": 0.6529099221549487, "percentage": 32.63, "elapsed_time": "4:45:39", "remaining_time": "9:49:42"} +{"current_steps": 1322, "total_steps": 4048, "loss": 0.35060590505599976, "lr": 1.611000744991942e-05, "epoch": 0.6534041764487829, "percentage": 32.66, "elapsed_time": "4:45:52", "remaining_time": "9:49:29"} +{"current_steps": 1323, "total_steps": 4048, "loss": 0.3766549825668335, "lr": 1.610353732518752e-05, "epoch": 0.6538984307426171, "percentage": 32.68, "elapsed_time": "4:46:05", "remaining_time": "9:49:15"} +{"current_steps": 1324, "total_steps": 4048, "loss": 0.2970678210258484, "lr": 1.609706312582131e-05, "epoch": 0.6543926850364512, "percentage": 32.71, "elapsed_time": "4:46:18", "remaining_time": "9:49:03"} +{"current_steps": 1325, "total_steps": 4048, "loss": 0.3345789909362793, "lr": 1.609058485614287e-05, "epoch": 0.6548869393302854, "percentage": 32.73, "elapsed_time": "4:46:31", "remaining_time": "9:48:50"} +{"current_steps": 1326, "total_steps": 4048, "loss": 0.34838157892227173, "lr": 1.608410252047701e-05, "epoch": 0.6553811936241196, "percentage": 32.76, "elapsed_time": "4:46:45", "remaining_time": "9:48:38"} +{"current_steps": 1327, "total_steps": 4048, "loss": 0.27454087138175964, "lr": 1.6077616123151232e-05, "epoch": 0.6558754479179538, "percentage": 32.78, "elapsed_time": "4:46:58", "remaining_time": "9:48:26"} +{"current_steps": 1328, "total_steps": 4048, "loss": 0.3372647762298584, "lr": 1.607112566849577e-05, "epoch": 0.6563697022117879, "percentage": 32.81, "elapsed_time": "4:47:12", "remaining_time": "9:48:15"} +{"current_steps": 1329, "total_steps": 4048, "loss": 0.34433993697166443, "lr": 1.606463116084356e-05, "epoch": 0.6568639565056221, "percentage": 32.83, "elapsed_time": "4:47:25", "remaining_time": "9:48:02"} +{"current_steps": 1330, "total_steps": 4048, "loss": 0.3267759382724762, "lr": 1.6058132604530242e-05, "epoch": 0.6573582107994563, "percentage": 32.86, "elapsed_time": "4:47:39", "remaining_time": "9:47:50"} +{"current_steps": 1331, "total_steps": 4048, "loss": 0.3022347390651703, "lr": 1.6051630003894155e-05, "epoch": 0.6578524650932905, "percentage": 32.88, "elapsed_time": "4:47:52", "remaining_time": "9:47:38"} +{"current_steps": 1332, "total_steps": 4048, "loss": 0.32478266954421997, "lr": 1.604512336327634e-05, "epoch": 0.6583467193871246, "percentage": 32.91, "elapsed_time": "4:48:06", "remaining_time": "9:47:28"} +{"current_steps": 1333, "total_steps": 4048, "loss": 0.32039204239845276, "lr": 1.6038612687020548e-05, "epoch": 0.6588409736809588, "percentage": 32.93, "elapsed_time": "4:48:20", "remaining_time": "9:47:16"} +{"current_steps": 1334, "total_steps": 4048, "loss": 0.3376410901546478, "lr": 1.6032097979473203e-05, "epoch": 0.659335227974793, "percentage": 32.95, "elapsed_time": "4:48:34", "remaining_time": "9:47:05"} +{"current_steps": 1335, "total_steps": 4048, "loss": 0.28432029485702515, "lr": 1.6025579244983443e-05, "epoch": 0.6598294822686273, "percentage": 32.98, "elapsed_time": "4:48:47", "remaining_time": "9:46:53"} +{"current_steps": 1336, "total_steps": 4048, "loss": 0.3349001109600067, "lr": 1.6019056487903067e-05, "epoch": 0.6603237365624613, "percentage": 33.0, "elapsed_time": "4:49:01", "remaining_time": "9:46:43"} +{"current_steps": 1337, "total_steps": 4048, "loss": 0.27995598316192627, "lr": 1.601252971258658e-05, "epoch": 0.6608179908562956, "percentage": 33.03, "elapsed_time": "4:49:14", "remaining_time": "9:46:29"} +{"current_steps": 1338, "total_steps": 4048, "loss": 0.28326892852783203, "lr": 1.6005998923391172e-05, "epoch": 0.6613122451501298, "percentage": 33.05, "elapsed_time": "4:49:27", "remaining_time": "9:46:16"} +{"current_steps": 1339, "total_steps": 4048, "loss": 0.3139200806617737, "lr": 1.5999464124676697e-05, "epoch": 0.661806499443964, "percentage": 33.08, "elapsed_time": "4:49:41", "remaining_time": "9:46:04"} +{"current_steps": 1340, "total_steps": 4048, "loss": 0.32395505905151367, "lr": 1.5992925320805688e-05, "epoch": 0.6623007537377981, "percentage": 33.1, "elapsed_time": "4:49:54", "remaining_time": "9:45:52"} +{"current_steps": 1341, "total_steps": 4048, "loss": 0.35880255699157715, "lr": 1.598638251614337e-05, "epoch": 0.6627950080316323, "percentage": 33.13, "elapsed_time": "4:50:08", "remaining_time": "9:45:41"} +{"current_steps": 1342, "total_steps": 4048, "loss": 0.3696775436401367, "lr": 1.5979835715057616e-05, "epoch": 0.6632892623254665, "percentage": 33.15, "elapsed_time": "4:50:21", "remaining_time": "9:45:27"} +{"current_steps": 1343, "total_steps": 4048, "loss": 0.38413193821907043, "lr": 1.597328492191898e-05, "epoch": 0.6637835166193007, "percentage": 33.18, "elapsed_time": "4:50:34", "remaining_time": "9:45:15"} +{"current_steps": 1344, "total_steps": 4048, "loss": 0.3564830720424652, "lr": 1.596673014110068e-05, "epoch": 0.6642777709131348, "percentage": 33.2, "elapsed_time": "4:50:47", "remaining_time": "9:45:02"} +{"current_steps": 1345, "total_steps": 4048, "loss": 0.30634552240371704, "lr": 1.5960171376978587e-05, "epoch": 0.664772025206969, "percentage": 33.23, "elapsed_time": "4:51:00", "remaining_time": "9:44:50"} +{"current_steps": 1346, "total_steps": 4048, "loss": 0.27113068103790283, "lr": 1.595360863393125e-05, "epoch": 0.6652662795008032, "percentage": 33.25, "elapsed_time": "4:51:13", "remaining_time": "9:44:36"} +{"current_steps": 1347, "total_steps": 4048, "loss": 0.34015512466430664, "lr": 1.594704191633985e-05, "epoch": 0.6657605337946373, "percentage": 33.28, "elapsed_time": "4:51:27", "remaining_time": "9:44:25"} +{"current_steps": 1348, "total_steps": 4048, "loss": 0.2509229779243469, "lr": 1.594047122858824e-05, "epoch": 0.6662547880884715, "percentage": 33.3, "elapsed_time": "4:51:40", "remaining_time": "9:44:13"} +{"current_steps": 1349, "total_steps": 4048, "loss": 0.35122111439704895, "lr": 1.5933896575062922e-05, "epoch": 0.6667490423823057, "percentage": 33.33, "elapsed_time": "4:51:54", "remaining_time": "9:44:01"} +{"current_steps": 1350, "total_steps": 4048, "loss": 0.3656314015388489, "lr": 1.592731796015303e-05, "epoch": 0.6672432966761399, "percentage": 33.35, "elapsed_time": "4:52:07", "remaining_time": "9:43:48"} +{"current_steps": 1351, "total_steps": 4048, "loss": 0.3482551574707031, "lr": 1.5920735388250363e-05, "epoch": 0.667737550969974, "percentage": 33.37, "elapsed_time": "4:52:20", "remaining_time": "9:43:35"} +{"current_steps": 1352, "total_steps": 4048, "loss": 0.2852175831794739, "lr": 1.5914148863749344e-05, "epoch": 0.6682318052638082, "percentage": 33.4, "elapsed_time": "4:52:33", "remaining_time": "9:43:23"} +{"current_steps": 1353, "total_steps": 4048, "loss": 0.3435940742492676, "lr": 1.590755839104705e-05, "epoch": 0.6687260595576424, "percentage": 33.42, "elapsed_time": "4:52:46", "remaining_time": "9:43:10"} +{"current_steps": 1354, "total_steps": 4048, "loss": 0.34816527366638184, "lr": 1.590096397454318e-05, "epoch": 0.6692203138514766, "percentage": 33.45, "elapsed_time": "4:53:00", "remaining_time": "9:42:58"} +{"current_steps": 1355, "total_steps": 4048, "loss": 0.3283170461654663, "lr": 1.5894365618640077e-05, "epoch": 0.6697145681453107, "percentage": 33.47, "elapsed_time": "4:53:12", "remaining_time": "9:42:45"} +{"current_steps": 1356, "total_steps": 4048, "loss": 0.335905522108078, "lr": 1.588776332774271e-05, "epoch": 0.6702088224391449, "percentage": 33.5, "elapsed_time": "4:53:25", "remaining_time": "9:42:31"} +{"current_steps": 1357, "total_steps": 4048, "loss": 0.3055316209793091, "lr": 1.5881157106258666e-05, "epoch": 0.6707030767329791, "percentage": 33.52, "elapsed_time": "4:53:37", "remaining_time": "9:42:16"} +{"current_steps": 1358, "total_steps": 4048, "loss": 0.2873142659664154, "lr": 1.5874546958598172e-05, "epoch": 0.6711973310268133, "percentage": 33.55, "elapsed_time": "4:53:50", "remaining_time": "9:42:03"} +{"current_steps": 1359, "total_steps": 4048, "loss": 0.29659712314605713, "lr": 1.586793288917406e-05, "epoch": 0.6716915853206474, "percentage": 33.57, "elapsed_time": "4:54:02", "remaining_time": "9:41:49"} +{"current_steps": 1360, "total_steps": 4048, "loss": 0.33081990480422974, "lr": 1.5861314902401802e-05, "epoch": 0.6721858396144816, "percentage": 33.6, "elapsed_time": "4:54:15", "remaining_time": "9:41:35"} +{"current_steps": 1361, "total_steps": 4048, "loss": 0.3559015691280365, "lr": 1.5854693002699457e-05, "epoch": 0.6726800939083158, "percentage": 33.62, "elapsed_time": "4:54:27", "remaining_time": "9:41:21"} +{"current_steps": 1362, "total_steps": 4048, "loss": 0.3353438973426819, "lr": 1.584806719448772e-05, "epoch": 0.67317434820215, "percentage": 33.65, "elapsed_time": "4:54:40", "remaining_time": "9:41:07"} +{"current_steps": 1363, "total_steps": 4048, "loss": 0.3320685923099518, "lr": 1.5841437482189882e-05, "epoch": 0.6736686024959841, "percentage": 33.67, "elapsed_time": "4:54:52", "remaining_time": "9:40:52"} +{"current_steps": 1364, "total_steps": 4048, "loss": 0.3070179224014282, "lr": 1.5834803870231846e-05, "epoch": 0.6741628567898184, "percentage": 33.7, "elapsed_time": "4:55:05", "remaining_time": "9:40:39"} +{"current_steps": 1365, "total_steps": 4048, "loss": 0.28779780864715576, "lr": 1.5828166363042115e-05, "epoch": 0.6746571110836526, "percentage": 33.72, "elapsed_time": "4:55:17", "remaining_time": "9:40:24"} +{"current_steps": 1366, "total_steps": 4048, "loss": 0.2793114185333252, "lr": 1.5821524965051793e-05, "epoch": 0.6751513653774868, "percentage": 33.75, "elapsed_time": "4:55:30", "remaining_time": "9:40:11"} +{"current_steps": 1367, "total_steps": 4048, "loss": 0.3586357831954956, "lr": 1.5814879680694585e-05, "epoch": 0.6756456196713209, "percentage": 33.77, "elapsed_time": "4:55:42", "remaining_time": "9:39:56"} +{"current_steps": 1368, "total_steps": 4048, "loss": 0.35258832573890686, "lr": 1.5808230514406786e-05, "epoch": 0.6761398739651551, "percentage": 33.79, "elapsed_time": "4:55:55", "remaining_time": "9:39:43"} +{"current_steps": 1369, "total_steps": 4048, "loss": 0.2783607840538025, "lr": 1.5801577470627286e-05, "epoch": 0.6766341282589893, "percentage": 33.82, "elapsed_time": "4:56:07", "remaining_time": "9:39:29"} +{"current_steps": 1370, "total_steps": 4048, "loss": 0.3494858741760254, "lr": 1.579492055379756e-05, "epoch": 0.6771283825528235, "percentage": 33.84, "elapsed_time": "4:56:20", "remaining_time": "9:39:15"} +{"current_steps": 1371, "total_steps": 4048, "loss": 0.34512561559677124, "lr": 1.578825976836167e-05, "epoch": 0.6776226368466576, "percentage": 33.87, "elapsed_time": "4:56:33", "remaining_time": "9:39:02"} +{"current_steps": 1372, "total_steps": 4048, "loss": 0.2923341989517212, "lr": 1.5781595118766265e-05, "epoch": 0.6781168911404918, "percentage": 33.89, "elapsed_time": "4:56:45", "remaining_time": "9:38:48"} +{"current_steps": 1373, "total_steps": 4048, "loss": 0.3078833818435669, "lr": 1.5774926609460566e-05, "epoch": 0.678611145434326, "percentage": 33.92, "elapsed_time": "4:56:58", "remaining_time": "9:38:34"} +{"current_steps": 1374, "total_steps": 4048, "loss": 0.3147008419036865, "lr": 1.576825424489638e-05, "epoch": 0.6791053997281601, "percentage": 33.94, "elapsed_time": "4:57:10", "remaining_time": "9:38:20"} +{"current_steps": 1375, "total_steps": 4048, "loss": 0.2907789349555969, "lr": 1.576157802952807e-05, "epoch": 0.6795996540219943, "percentage": 33.97, "elapsed_time": "4:57:23", "remaining_time": "9:38:07"} +{"current_steps": 1376, "total_steps": 4048, "loss": 0.2941555976867676, "lr": 1.57548979678126e-05, "epoch": 0.6800939083158285, "percentage": 33.99, "elapsed_time": "4:57:35", "remaining_time": "9:37:52"} +{"current_steps": 1377, "total_steps": 4048, "loss": 0.3452342748641968, "lr": 1.5748214064209473e-05, "epoch": 0.6805881626096627, "percentage": 34.02, "elapsed_time": "4:57:48", "remaining_time": "9:37:39"} +{"current_steps": 1378, "total_steps": 4048, "loss": 0.31481361389160156, "lr": 1.5741526323180765e-05, "epoch": 0.6810824169034968, "percentage": 34.04, "elapsed_time": "4:58:00", "remaining_time": "9:37:25"} +{"current_steps": 1379, "total_steps": 4048, "loss": 0.3403349220752716, "lr": 1.573483474919112e-05, "epoch": 0.681576671197331, "percentage": 34.07, "elapsed_time": "4:58:13", "remaining_time": "9:37:11"} +{"current_steps": 1380, "total_steps": 4048, "loss": 0.3283364176750183, "lr": 1.572813934670774e-05, "epoch": 0.6820709254911652, "percentage": 34.09, "elapsed_time": "4:58:25", "remaining_time": "9:36:57"} +{"current_steps": 1381, "total_steps": 4048, "loss": 0.3294883966445923, "lr": 1.5721440120200376e-05, "epoch": 0.6825651797849994, "percentage": 34.12, "elapsed_time": "4:58:37", "remaining_time": "9:36:43"} +{"current_steps": 1382, "total_steps": 4048, "loss": 0.3087981343269348, "lr": 1.5714737074141338e-05, "epoch": 0.6830594340788335, "percentage": 34.14, "elapsed_time": "4:58:50", "remaining_time": "9:36:29"} +{"current_steps": 1383, "total_steps": 4048, "loss": 0.29511693120002747, "lr": 1.570803021300548e-05, "epoch": 0.6835536883726677, "percentage": 34.17, "elapsed_time": "4:59:02", "remaining_time": "9:36:15"} +{"current_steps": 1384, "total_steps": 4048, "loss": 0.3620823323726654, "lr": 1.570131954127021e-05, "epoch": 0.6840479426665019, "percentage": 34.19, "elapsed_time": "4:59:15", "remaining_time": "9:36:01"} +{"current_steps": 1385, "total_steps": 4048, "loss": 0.3978300988674164, "lr": 1.5694605063415477e-05, "epoch": 0.6845421969603361, "percentage": 34.21, "elapsed_time": "4:59:27", "remaining_time": "9:35:47"} +{"current_steps": 1386, "total_steps": 4048, "loss": 0.35367661714553833, "lr": 1.5687886783923773e-05, "epoch": 0.6850364512541702, "percentage": 34.24, "elapsed_time": "4:59:40", "remaining_time": "9:35:34"} +{"current_steps": 1387, "total_steps": 4048, "loss": 0.3313448131084442, "lr": 1.5681164707280117e-05, "epoch": 0.6855307055480044, "percentage": 34.26, "elapsed_time": "4:59:53", "remaining_time": "9:35:21"} +{"current_steps": 1388, "total_steps": 4048, "loss": 0.34115713834762573, "lr": 1.5674438837972077e-05, "epoch": 0.6860249598418386, "percentage": 34.29, "elapsed_time": "5:00:07", "remaining_time": "9:35:10"} +{"current_steps": 1389, "total_steps": 4048, "loss": 0.311326265335083, "lr": 1.566770918048975e-05, "epoch": 0.6865192141356729, "percentage": 34.31, "elapsed_time": "5:00:21", "remaining_time": "9:34:58"} +{"current_steps": 1390, "total_steps": 4048, "loss": 0.32622700929641724, "lr": 1.5660975739325755e-05, "epoch": 0.687013468429507, "percentage": 34.34, "elapsed_time": "5:00:34", "remaining_time": "9:34:46"} +{"current_steps": 1391, "total_steps": 4048, "loss": 0.36029747128486633, "lr": 1.565423851897524e-05, "epoch": 0.6875077227233412, "percentage": 34.36, "elapsed_time": "5:00:47", "remaining_time": "9:34:33"} +{"current_steps": 1392, "total_steps": 4048, "loss": 0.2771177291870117, "lr": 1.5647497523935883e-05, "epoch": 0.6880019770171754, "percentage": 34.39, "elapsed_time": "5:01:00", "remaining_time": "9:34:21"} +{"current_steps": 1393, "total_steps": 4048, "loss": 0.3474002182483673, "lr": 1.5640752758707868e-05, "epoch": 0.6884962313110096, "percentage": 34.41, "elapsed_time": "5:01:15", "remaining_time": "9:34:10"} +{"current_steps": 1394, "total_steps": 4048, "loss": 0.28006255626678467, "lr": 1.563400422779391e-05, "epoch": 0.6889904856048437, "percentage": 34.44, "elapsed_time": "5:01:28", "remaining_time": "9:33:57"} +{"current_steps": 1395, "total_steps": 4048, "loss": 0.32151490449905396, "lr": 1.562725193569923e-05, "epoch": 0.6894847398986779, "percentage": 34.46, "elapsed_time": "5:01:42", "remaining_time": "9:33:46"} +{"current_steps": 1396, "total_steps": 4048, "loss": 0.3081187903881073, "lr": 1.5620495886931557e-05, "epoch": 0.6899789941925121, "percentage": 34.49, "elapsed_time": "5:01:55", "remaining_time": "9:33:33"} +{"current_steps": 1397, "total_steps": 4048, "loss": 0.3158992826938629, "lr": 1.561373608600114e-05, "epoch": 0.6904732484863463, "percentage": 34.51, "elapsed_time": "5:02:07", "remaining_time": "9:33:20"} +{"current_steps": 1398, "total_steps": 4048, "loss": 0.33790335059165955, "lr": 1.5606972537420723e-05, "epoch": 0.6909675027801804, "percentage": 34.54, "elapsed_time": "5:02:20", "remaining_time": "9:33:07"} +{"current_steps": 1399, "total_steps": 4048, "loss": 0.3157292902469635, "lr": 1.5600205245705553e-05, "epoch": 0.6914617570740146, "percentage": 34.56, "elapsed_time": "5:02:33", "remaining_time": "9:32:54"} +{"current_steps": 1400, "total_steps": 4048, "loss": 0.31090572476387024, "lr": 1.559343421537338e-05, "epoch": 0.6919560113678488, "percentage": 34.58, "elapsed_time": "5:02:46", "remaining_time": "9:32:40"} +{"current_steps": 1401, "total_steps": 4048, "loss": 0.30499958992004395, "lr": 1.5586659450944443e-05, "epoch": 0.6924502656616829, "percentage": 34.61, "elapsed_time": "5:03:05", "remaining_time": "9:32:38"} +{"current_steps": 1402, "total_steps": 4048, "loss": 0.3036794662475586, "lr": 1.5579880956941478e-05, "epoch": 0.6929445199555171, "percentage": 34.63, "elapsed_time": "5:03:17", "remaining_time": "9:32:24"} +{"current_steps": 1403, "total_steps": 4048, "loss": 0.26514700055122375, "lr": 1.5573098737889716e-05, "epoch": 0.6934387742493513, "percentage": 34.66, "elapsed_time": "5:03:30", "remaining_time": "9:32:12"} +{"current_steps": 1404, "total_steps": 4048, "loss": 0.31947457790374756, "lr": 1.5566312798316867e-05, "epoch": 0.6939330285431855, "percentage": 34.68, "elapsed_time": "5:03:43", "remaining_time": "9:31:58"} +{"current_steps": 1405, "total_steps": 4048, "loss": 0.29387322068214417, "lr": 1.5559523142753124e-05, "epoch": 0.6944272828370196, "percentage": 34.71, "elapsed_time": "5:03:57", "remaining_time": "9:31:46"} +{"current_steps": 1406, "total_steps": 4048, "loss": 0.33459946513175964, "lr": 1.555272977573117e-05, "epoch": 0.6949215371308538, "percentage": 34.73, "elapsed_time": "5:04:09", "remaining_time": "9:31:32"} +{"current_steps": 1407, "total_steps": 4048, "loss": 0.31394320726394653, "lr": 1.5545932701786154e-05, "epoch": 0.695415791424688, "percentage": 34.76, "elapsed_time": "5:04:22", "remaining_time": "9:31:18"} +{"current_steps": 1408, "total_steps": 4048, "loss": 0.2891885042190552, "lr": 1.5539131925455713e-05, "epoch": 0.6959100457185222, "percentage": 34.78, "elapsed_time": "5:04:34", "remaining_time": "9:31:05"} +{"current_steps": 1409, "total_steps": 4048, "loss": 0.33686599135398865, "lr": 1.5532327451279938e-05, "epoch": 0.6964043000123563, "percentage": 34.81, "elapsed_time": "5:04:47", "remaining_time": "9:30:51"} +{"current_steps": 1410, "total_steps": 4048, "loss": 0.31463146209716797, "lr": 1.5525519283801405e-05, "epoch": 0.6968985543061905, "percentage": 34.83, "elapsed_time": "5:05:00", "remaining_time": "9:30:39"} +{"current_steps": 1411, "total_steps": 4048, "loss": 0.3598940372467041, "lr": 1.5518707427565146e-05, "epoch": 0.6973928086000247, "percentage": 34.86, "elapsed_time": "5:05:13", "remaining_time": "9:30:26"} +{"current_steps": 1412, "total_steps": 4048, "loss": 0.32980066537857056, "lr": 1.5511891887118665e-05, "epoch": 0.6978870628938589, "percentage": 34.88, "elapsed_time": "5:05:27", "remaining_time": "9:30:14"} +{"current_steps": 1413, "total_steps": 4048, "loss": 0.3264961242675781, "lr": 1.5505072667011915e-05, "epoch": 0.698381317187693, "percentage": 34.91, "elapsed_time": "5:05:40", "remaining_time": "9:30:01"} +{"current_steps": 1414, "total_steps": 4048, "loss": 0.3355519771575928, "lr": 1.549824977179731e-05, "epoch": 0.6988755714815272, "percentage": 34.93, "elapsed_time": "5:05:53", "remaining_time": "9:29:48"} +{"current_steps": 1415, "total_steps": 4048, "loss": 0.27073174715042114, "lr": 1.5491423206029717e-05, "epoch": 0.6993698257753614, "percentage": 34.96, "elapsed_time": "5:06:06", "remaining_time": "9:29:36"} +{"current_steps": 1416, "total_steps": 4048, "loss": 0.32638323307037354, "lr": 1.5484592974266456e-05, "epoch": 0.6998640800691956, "percentage": 34.98, "elapsed_time": "5:06:19", "remaining_time": "9:29:23"} +{"current_steps": 1417, "total_steps": 4048, "loss": 0.38844019174575806, "lr": 1.5477759081067288e-05, "epoch": 0.7003583343630297, "percentage": 35.0, "elapsed_time": "5:06:32", "remaining_time": "9:29:10"} +{"current_steps": 1418, "total_steps": 4048, "loss": 0.3386498689651489, "lr": 1.5470921530994426e-05, "epoch": 0.700852588656864, "percentage": 35.03, "elapsed_time": "5:06:46", "remaining_time": "9:28:59"} +{"current_steps": 1419, "total_steps": 4048, "loss": 0.3304392993450165, "lr": 1.5464080328612522e-05, "epoch": 0.7013468429506982, "percentage": 35.05, "elapsed_time": "5:06:59", "remaining_time": "9:28:46"} +{"current_steps": 1420, "total_steps": 4048, "loss": 0.314837247133255, "lr": 1.545723547848866e-05, "epoch": 0.7018410972445324, "percentage": 35.08, "elapsed_time": "5:07:13", "remaining_time": "9:28:34"} +{"current_steps": 1421, "total_steps": 4048, "loss": 0.30135127902030945, "lr": 1.5450386985192368e-05, "epoch": 0.7023353515383665, "percentage": 35.1, "elapsed_time": "5:07:26", "remaining_time": "9:28:21"} +{"current_steps": 1422, "total_steps": 4048, "loss": 0.29176798462867737, "lr": 1.5443534853295602e-05, "epoch": 0.7028296058322007, "percentage": 35.13, "elapsed_time": "5:07:38", "remaining_time": "9:28:07"} +{"current_steps": 1423, "total_steps": 4048, "loss": 0.36438125371932983, "lr": 1.5436679087372746e-05, "epoch": 0.7033238601260349, "percentage": 35.15, "elapsed_time": "5:07:51", "remaining_time": "9:27:54"} +{"current_steps": 1424, "total_steps": 4048, "loss": 0.37140434980392456, "lr": 1.542981969200061e-05, "epoch": 0.703818114419869, "percentage": 35.18, "elapsed_time": "5:08:04", "remaining_time": "9:27:40"} +{"current_steps": 1425, "total_steps": 4048, "loss": 0.36072903871536255, "lr": 1.542295667175843e-05, "epoch": 0.7043123687137032, "percentage": 35.2, "elapsed_time": "5:08:16", "remaining_time": "9:27:27"} +{"current_steps": 1426, "total_steps": 4048, "loss": 0.3266327977180481, "lr": 1.5416090031227868e-05, "epoch": 0.7048066230075374, "percentage": 35.23, "elapsed_time": "5:08:29", "remaining_time": "9:27:12"} +{"current_steps": 1427, "total_steps": 4048, "loss": 0.3081423342227936, "lr": 1.5409219774992978e-05, "epoch": 0.7053008773013716, "percentage": 35.25, "elapsed_time": "5:08:42", "remaining_time": "9:26:59"} +{"current_steps": 1428, "total_steps": 4048, "loss": 0.3571197986602783, "lr": 1.5402345907640262e-05, "epoch": 0.7057951315952057, "percentage": 35.28, "elapsed_time": "5:08:54", "remaining_time": "9:26:45"} +{"current_steps": 1429, "total_steps": 4048, "loss": 0.32380104064941406, "lr": 1.5395468433758604e-05, "epoch": 0.7062893858890399, "percentage": 35.3, "elapsed_time": "5:09:07", "remaining_time": "9:26:32"} +{"current_steps": 1430, "total_steps": 4048, "loss": 0.33777546882629395, "lr": 1.5388587357939313e-05, "epoch": 0.7067836401828741, "percentage": 35.33, "elapsed_time": "5:09:19", "remaining_time": "9:26:18"} +{"current_steps": 1431, "total_steps": 4048, "loss": 0.31793370842933655, "lr": 1.5381702684776093e-05, "epoch": 0.7072778944767083, "percentage": 35.35, "elapsed_time": "5:09:32", "remaining_time": "9:26:05"} +{"current_steps": 1432, "total_steps": 4048, "loss": 0.3282355070114136, "lr": 1.537481441886506e-05, "epoch": 0.7077721487705424, "percentage": 35.38, "elapsed_time": "5:09:45", "remaining_time": "9:25:52"} +{"current_steps": 1433, "total_steps": 4048, "loss": 0.3523057699203491, "lr": 1.5367922564804716e-05, "epoch": 0.7082664030643766, "percentage": 35.4, "elapsed_time": "5:09:58", "remaining_time": "9:25:39"} +{"current_steps": 1434, "total_steps": 4048, "loss": 0.36351460218429565, "lr": 1.5361027127195964e-05, "epoch": 0.7087606573582108, "percentage": 35.42, "elapsed_time": "5:10:12", "remaining_time": "9:25:27"} +{"current_steps": 1435, "total_steps": 4048, "loss": 0.2936401963233948, "lr": 1.5354128110642102e-05, "epoch": 0.709254911652045, "percentage": 35.45, "elapsed_time": "5:10:25", "remaining_time": "9:25:15"} +{"current_steps": 1436, "total_steps": 4048, "loss": 0.3178175091743469, "lr": 1.5347225519748818e-05, "epoch": 0.7097491659458791, "percentage": 35.47, "elapsed_time": "5:10:38", "remaining_time": "9:25:02"} +{"current_steps": 1437, "total_steps": 4048, "loss": 0.3098832666873932, "lr": 1.5340319359124177e-05, "epoch": 0.7102434202397133, "percentage": 35.5, "elapsed_time": "5:10:52", "remaining_time": "9:24:50"} +{"current_steps": 1438, "total_steps": 4048, "loss": 0.2644941806793213, "lr": 1.5333409633378633e-05, "epoch": 0.7107376745335475, "percentage": 35.52, "elapsed_time": "5:11:04", "remaining_time": "9:24:36"} +{"current_steps": 1439, "total_steps": 4048, "loss": 0.3046286702156067, "lr": 1.5326496347125027e-05, "epoch": 0.7112319288273817, "percentage": 35.55, "elapsed_time": "5:11:17", "remaining_time": "9:24:23"} +{"current_steps": 1440, "total_steps": 4048, "loss": 0.33757925033569336, "lr": 1.5319579504978567e-05, "epoch": 0.7117261831212158, "percentage": 35.57, "elapsed_time": "5:11:29", "remaining_time": "9:24:09"} +{"current_steps": 1441, "total_steps": 4048, "loss": 0.3470202684402466, "lr": 1.5312659111556832e-05, "epoch": 0.71222043741505, "percentage": 35.6, "elapsed_time": "5:11:42", "remaining_time": "9:23:55"} +{"current_steps": 1442, "total_steps": 4048, "loss": 0.3310868740081787, "lr": 1.5305735171479785e-05, "epoch": 0.7127146917088842, "percentage": 35.62, "elapsed_time": "5:11:55", "remaining_time": "9:23:42"} +{"current_steps": 1443, "total_steps": 4048, "loss": 0.31649407744407654, "lr": 1.529880768936975e-05, "epoch": 0.7132089460027184, "percentage": 35.65, "elapsed_time": "5:12:08", "remaining_time": "9:23:29"} +{"current_steps": 1444, "total_steps": 4048, "loss": 0.2986135184764862, "lr": 1.5291876669851408e-05, "epoch": 0.7137032002965525, "percentage": 35.67, "elapsed_time": "5:12:21", "remaining_time": "9:23:17"} +{"current_steps": 1445, "total_steps": 4048, "loss": 0.3033408224582672, "lr": 1.5284942117551817e-05, "epoch": 0.7141974545903867, "percentage": 35.7, "elapsed_time": "5:12:35", "remaining_time": "9:23:05"} +{"current_steps": 1446, "total_steps": 4048, "loss": 0.34231680631637573, "lr": 1.5278004037100378e-05, "epoch": 0.714691708884221, "percentage": 35.72, "elapsed_time": "5:12:48", "remaining_time": "9:22:52"} +{"current_steps": 1447, "total_steps": 4048, "loss": 0.3273579478263855, "lr": 1.5271062433128857e-05, "epoch": 0.7151859631780552, "percentage": 35.75, "elapsed_time": "5:13:00", "remaining_time": "9:22:38"} +{"current_steps": 1448, "total_steps": 4048, "loss": 0.344064861536026, "lr": 1.5264117310271372e-05, "epoch": 0.7156802174718893, "percentage": 35.77, "elapsed_time": "5:13:13", "remaining_time": "9:22:25"} +{"current_steps": 1449, "total_steps": 4048, "loss": 0.3131038546562195, "lr": 1.5257168673164384e-05, "epoch": 0.7161744717657235, "percentage": 35.8, "elapsed_time": "5:13:26", "remaining_time": "9:22:12"} +{"current_steps": 1450, "total_steps": 4048, "loss": 0.32794755697250366, "lr": 1.5250216526446708e-05, "epoch": 0.7166687260595577, "percentage": 35.82, "elapsed_time": "5:13:39", "remaining_time": "9:21:59"} +{"current_steps": 1451, "total_steps": 4048, "loss": 0.3633842468261719, "lr": 1.5243260874759494e-05, "epoch": 0.7171629803533918, "percentage": 35.84, "elapsed_time": "5:13:51", "remaining_time": "9:21:45"} +{"current_steps": 1452, "total_steps": 4048, "loss": 0.24650251865386963, "lr": 1.5236301722746235e-05, "epoch": 0.717657234647226, "percentage": 35.87, "elapsed_time": "5:14:04", "remaining_time": "9:21:31"} +{"current_steps": 1453, "total_steps": 4048, "loss": 0.34167230129241943, "lr": 1.5229339075052769e-05, "epoch": 0.7181514889410602, "percentage": 35.89, "elapsed_time": "5:14:17", "remaining_time": "9:21:19"} +{"current_steps": 1454, "total_steps": 4048, "loss": 0.29454126954078674, "lr": 1.522237293632725e-05, "epoch": 0.7186457432348944, "percentage": 35.92, "elapsed_time": "5:14:30", "remaining_time": "9:21:06"} +{"current_steps": 1455, "total_steps": 4048, "loss": 0.3709314465522766, "lr": 1.5215403311220178e-05, "epoch": 0.7191399975287285, "percentage": 35.94, "elapsed_time": "5:14:44", "remaining_time": "9:20:54"} +{"current_steps": 1456, "total_steps": 4048, "loss": 0.3543916642665863, "lr": 1.5208430204384377e-05, "epoch": 0.7196342518225627, "percentage": 35.97, "elapsed_time": "5:14:57", "remaining_time": "9:20:41"} +{"current_steps": 1457, "total_steps": 4048, "loss": 0.33827707171440125, "lr": 1.5201453620474986e-05, "epoch": 0.7201285061163969, "percentage": 35.99, "elapsed_time": "5:15:11", "remaining_time": "9:20:29"} +{"current_steps": 1458, "total_steps": 4048, "loss": 0.31289514899253845, "lr": 1.5194473564149484e-05, "epoch": 0.7206227604102311, "percentage": 36.02, "elapsed_time": "5:15:24", "remaining_time": "9:20:17"} +{"current_steps": 1459, "total_steps": 4048, "loss": 0.3345657289028168, "lr": 1.5187490040067646e-05, "epoch": 0.7211170147040652, "percentage": 36.04, "elapsed_time": "5:15:37", "remaining_time": "9:20:05"} +{"current_steps": 1460, "total_steps": 4048, "loss": 0.3322404623031616, "lr": 1.5180503052891578e-05, "epoch": 0.7216112689978994, "percentage": 36.07, "elapsed_time": "5:15:51", "remaining_time": "9:19:52"} +{"current_steps": 1461, "total_steps": 4048, "loss": 0.31120461225509644, "lr": 1.5173512607285692e-05, "epoch": 0.7221055232917336, "percentage": 36.09, "elapsed_time": "5:16:04", "remaining_time": "9:19:41"} +{"current_steps": 1462, "total_steps": 4048, "loss": 0.3388645648956299, "lr": 1.5166518707916714e-05, "epoch": 0.7225997775855678, "percentage": 36.12, "elapsed_time": "5:16:18", "remaining_time": "9:19:28"} +{"current_steps": 1463, "total_steps": 4048, "loss": 0.3048557639122009, "lr": 1.5159521359453661e-05, "epoch": 0.7230940318794019, "percentage": 36.14, "elapsed_time": "5:16:31", "remaining_time": "9:19:16"} +{"current_steps": 1464, "total_steps": 4048, "loss": 0.32128047943115234, "lr": 1.5152520566567873e-05, "epoch": 0.7235882861732361, "percentage": 36.17, "elapsed_time": "5:16:45", "remaining_time": "9:19:05"} +{"current_steps": 1465, "total_steps": 4048, "loss": 0.3016900420188904, "lr": 1.5145516333932973e-05, "epoch": 0.7240825404670703, "percentage": 36.19, "elapsed_time": "5:16:59", "remaining_time": "9:18:53"} +{"current_steps": 1466, "total_steps": 4048, "loss": 0.27410340309143066, "lr": 1.5138508666224892e-05, "epoch": 0.7245767947609045, "percentage": 36.22, "elapsed_time": "5:17:13", "remaining_time": "9:18:42"} +{"current_steps": 1467, "total_steps": 4048, "loss": 0.314311146736145, "lr": 1.513149756812184e-05, "epoch": 0.7250710490547386, "percentage": 36.24, "elapsed_time": "5:17:27", "remaining_time": "9:18:30"} +{"current_steps": 1468, "total_steps": 4048, "loss": 0.300488144159317, "lr": 1.5124483044304339e-05, "epoch": 0.7255653033485728, "percentage": 36.26, "elapsed_time": "5:17:41", "remaining_time": "9:18:19"} +{"current_steps": 1469, "total_steps": 4048, "loss": 0.2610424757003784, "lr": 1.5117465099455173e-05, "epoch": 0.726059557642407, "percentage": 36.29, "elapsed_time": "5:17:54", "remaining_time": "9:18:08"} +{"current_steps": 1470, "total_steps": 4048, "loss": 0.2631368637084961, "lr": 1.5110443738259425e-05, "epoch": 0.7265538119362412, "percentage": 36.31, "elapsed_time": "5:18:08", "remaining_time": "9:17:56"} +{"current_steps": 1471, "total_steps": 4048, "loss": 0.2894716262817383, "lr": 1.510341896540446e-05, "epoch": 0.7270480662300753, "percentage": 36.34, "elapsed_time": "5:18:21", "remaining_time": "9:17:43"} +{"current_steps": 1472, "total_steps": 4048, "loss": 0.2859206199645996, "lr": 1.5096390785579913e-05, "epoch": 0.7275423205239095, "percentage": 36.36, "elapsed_time": "5:18:35", "remaining_time": "9:17:31"} +{"current_steps": 1473, "total_steps": 4048, "loss": 0.2966008484363556, "lr": 1.5089359203477693e-05, "epoch": 0.7280365748177438, "percentage": 36.39, "elapsed_time": "5:18:48", "remaining_time": "9:17:18"} +{"current_steps": 1474, "total_steps": 4048, "loss": 0.3187675476074219, "lr": 1.5082324223791988e-05, "epoch": 0.728530829111578, "percentage": 36.41, "elapsed_time": "5:19:02", "remaining_time": "9:17:07"} +{"current_steps": 1475, "total_steps": 4048, "loss": 0.32434171438217163, "lr": 1.507528585121925e-05, "epoch": 0.729025083405412, "percentage": 36.44, "elapsed_time": "5:19:15", "remaining_time": "9:16:54"} +{"current_steps": 1476, "total_steps": 4048, "loss": 0.3518364429473877, "lr": 1.5068244090458197e-05, "epoch": 0.7295193376992463, "percentage": 36.46, "elapsed_time": "5:19:28", "remaining_time": "9:16:42"} +{"current_steps": 1477, "total_steps": 4048, "loss": 0.32294291257858276, "lr": 1.50611989462098e-05, "epoch": 0.7300135919930805, "percentage": 36.49, "elapsed_time": "5:19:42", "remaining_time": "9:16:30"} +{"current_steps": 1478, "total_steps": 4048, "loss": 0.3413415253162384, "lr": 1.5054150423177307e-05, "epoch": 0.7305078462869146, "percentage": 36.51, "elapsed_time": "5:19:56", "remaining_time": "9:16:19"} +{"current_steps": 1479, "total_steps": 4048, "loss": 0.3562566637992859, "lr": 1.5047098526066207e-05, "epoch": 0.7310021005807488, "percentage": 36.54, "elapsed_time": "5:20:09", "remaining_time": "9:16:06"} +{"current_steps": 1480, "total_steps": 4048, "loss": 0.30018410086631775, "lr": 1.504004325958424e-05, "epoch": 0.731496354874583, "percentage": 36.56, "elapsed_time": "5:20:23", "remaining_time": "9:15:54"} +{"current_steps": 1481, "total_steps": 4048, "loss": 0.2937701344490051, "lr": 1.5032984628441409e-05, "epoch": 0.7319906091684172, "percentage": 36.59, "elapsed_time": "5:20:36", "remaining_time": "9:15:42"} +{"current_steps": 1482, "total_steps": 4048, "loss": 0.3268740773200989, "lr": 1.5025922637349953e-05, "epoch": 0.7324848634622513, "percentage": 36.61, "elapsed_time": "5:20:49", "remaining_time": "9:15:30"} +{"current_steps": 1483, "total_steps": 4048, "loss": 0.3246314525604248, "lr": 1.5018857291024356e-05, "epoch": 0.7329791177560855, "percentage": 36.64, "elapsed_time": "5:21:03", "remaining_time": "9:15:18"} +{"current_steps": 1484, "total_steps": 4048, "loss": 0.276904433965683, "lr": 1.501178859418134e-05, "epoch": 0.7334733720499197, "percentage": 36.66, "elapsed_time": "5:21:16", "remaining_time": "9:15:04"} +{"current_steps": 1485, "total_steps": 4048, "loss": 0.27665287256240845, "lr": 1.5004716551539873e-05, "epoch": 0.7339676263437539, "percentage": 36.68, "elapsed_time": "5:21:29", "remaining_time": "9:14:51"} +{"current_steps": 1486, "total_steps": 4048, "loss": 0.325985848903656, "lr": 1.4997641167821143e-05, "epoch": 0.734461880637588, "percentage": 36.71, "elapsed_time": "5:21:41", "remaining_time": "9:14:37"} +{"current_steps": 1487, "total_steps": 4048, "loss": 0.2951817214488983, "lr": 1.4990562447748573e-05, "epoch": 0.7349561349314222, "percentage": 36.73, "elapsed_time": "5:21:55", "remaining_time": "9:14:26"} +{"current_steps": 1488, "total_steps": 4048, "loss": 0.2592772841453552, "lr": 1.4983480396047822e-05, "epoch": 0.7354503892252564, "percentage": 36.76, "elapsed_time": "5:22:08", "remaining_time": "9:14:12"} +{"current_steps": 1489, "total_steps": 4048, "loss": 0.3278253674507141, "lr": 1.4976395017446767e-05, "epoch": 0.7359446435190906, "percentage": 36.78, "elapsed_time": "5:22:21", "remaining_time": "9:13:59"} +{"current_steps": 1490, "total_steps": 4048, "loss": 0.32366445660591125, "lr": 1.4969306316675497e-05, "epoch": 0.7364388978129247, "percentage": 36.81, "elapsed_time": "5:22:33", "remaining_time": "9:13:45"} +{"current_steps": 1491, "total_steps": 4048, "loss": 0.30544513463974, "lr": 1.4962214298466337e-05, "epoch": 0.7369331521067589, "percentage": 36.83, "elapsed_time": "5:22:46", "remaining_time": "9:13:32"} +{"current_steps": 1492, "total_steps": 4048, "loss": 0.3712898790836334, "lr": 1.4955118967553812e-05, "epoch": 0.7374274064005931, "percentage": 36.86, "elapsed_time": "5:22:59", "remaining_time": "9:13:19"} +{"current_steps": 1493, "total_steps": 4048, "loss": 0.3006438612937927, "lr": 1.4948020328674662e-05, "epoch": 0.7379216606944273, "percentage": 36.88, "elapsed_time": "5:23:11", "remaining_time": "9:13:05"} +{"current_steps": 1494, "total_steps": 4048, "loss": 0.3494953215122223, "lr": 1.494091838656784e-05, "epoch": 0.7384159149882614, "percentage": 36.91, "elapsed_time": "5:23:24", "remaining_time": "9:12:52"} +{"current_steps": 1495, "total_steps": 4048, "loss": 0.2698785662651062, "lr": 1.4933813145974504e-05, "epoch": 0.7389101692820956, "percentage": 36.93, "elapsed_time": "5:23:37", "remaining_time": "9:12:38"} +{"current_steps": 1496, "total_steps": 4048, "loss": 0.34775635600090027, "lr": 1.4926704611638003e-05, "epoch": 0.7394044235759298, "percentage": 36.96, "elapsed_time": "5:23:50", "remaining_time": "9:12:25"} +{"current_steps": 1497, "total_steps": 4048, "loss": 0.328175812959671, "lr": 1.4919592788303898e-05, "epoch": 0.739898677869764, "percentage": 36.98, "elapsed_time": "5:24:02", "remaining_time": "9:12:11"} +{"current_steps": 1498, "total_steps": 4048, "loss": 0.3320178687572479, "lr": 1.491247768071994e-05, "epoch": 0.7403929321635981, "percentage": 37.01, "elapsed_time": "5:24:15", "remaining_time": "9:11:58"} +{"current_steps": 1499, "total_steps": 4048, "loss": 0.308150053024292, "lr": 1.4905359293636074e-05, "epoch": 0.7408871864574323, "percentage": 37.03, "elapsed_time": "5:24:28", "remaining_time": "9:11:44"} +{"current_steps": 1500, "total_steps": 4048, "loss": 0.3311570882797241, "lr": 1.489823763180443e-05, "epoch": 0.7413814407512666, "percentage": 37.06, "elapsed_time": "5:24:40", "remaining_time": "9:11:31"} +{"current_steps": 1501, "total_steps": 4048, "loss": 0.36916327476501465, "lr": 1.4891112699979334e-05, "epoch": 0.7418756950451008, "percentage": 37.08, "elapsed_time": "5:24:58", "remaining_time": "9:11:25"} +{"current_steps": 1502, "total_steps": 4048, "loss": 0.28005337715148926, "lr": 1.4883984502917286e-05, "epoch": 0.7423699493389349, "percentage": 37.1, "elapsed_time": "5:25:10", "remaining_time": "9:11:12"} +{"current_steps": 1503, "total_steps": 4048, "loss": 0.3502781391143799, "lr": 1.4876853045376962e-05, "epoch": 0.7428642036327691, "percentage": 37.13, "elapsed_time": "5:25:22", "remaining_time": "9:10:57"} +{"current_steps": 1504, "total_steps": 4048, "loss": 0.32032880187034607, "lr": 1.4869718332119232e-05, "epoch": 0.7433584579266033, "percentage": 37.15, "elapsed_time": "5:25:35", "remaining_time": "9:10:43"} +{"current_steps": 1505, "total_steps": 4048, "loss": 0.3229472041130066, "lr": 1.4862580367907118e-05, "epoch": 0.7438527122204374, "percentage": 37.18, "elapsed_time": "5:25:47", "remaining_time": "9:10:29"} +{"current_steps": 1506, "total_steps": 4048, "loss": 0.2725368142127991, "lr": 1.4855439157505833e-05, "epoch": 0.7443469665142716, "percentage": 37.2, "elapsed_time": "5:26:00", "remaining_time": "9:10:15"} +{"current_steps": 1507, "total_steps": 4048, "loss": 0.35358861088752747, "lr": 1.4848294705682737e-05, "epoch": 0.7448412208081058, "percentage": 37.23, "elapsed_time": "5:26:12", "remaining_time": "9:10:01"} +{"current_steps": 1508, "total_steps": 4048, "loss": 0.299206018447876, "lr": 1.4841147017207376e-05, "epoch": 0.74533547510194, "percentage": 37.25, "elapsed_time": "5:26:24", "remaining_time": "9:09:47"} +{"current_steps": 1509, "total_steps": 4048, "loss": 0.32004314661026, "lr": 1.4833996096851432e-05, "epoch": 0.7458297293957741, "percentage": 37.28, "elapsed_time": "5:26:37", "remaining_time": "9:09:33"} +{"current_steps": 1510, "total_steps": 4048, "loss": 0.32800590991973877, "lr": 1.4826841949388767e-05, "epoch": 0.7463239836896083, "percentage": 37.3, "elapsed_time": "5:26:50", "remaining_time": "9:09:20"} +{"current_steps": 1511, "total_steps": 4048, "loss": 0.2916460335254669, "lr": 1.4819684579595382e-05, "epoch": 0.7468182379834425, "percentage": 37.33, "elapsed_time": "5:27:02", "remaining_time": "9:09:07"} +{"current_steps": 1512, "total_steps": 4048, "loss": 0.3276118338108063, "lr": 1.4812523992249437e-05, "epoch": 0.7473124922772767, "percentage": 37.35, "elapsed_time": "5:27:15", "remaining_time": "9:08:53"} +{"current_steps": 1513, "total_steps": 4048, "loss": 0.34718069434165955, "lr": 1.4805360192131234e-05, "epoch": 0.7478067465711108, "percentage": 37.38, "elapsed_time": "5:27:28", "remaining_time": "9:08:41"} +{"current_steps": 1514, "total_steps": 4048, "loss": 0.2810167372226715, "lr": 1.4798193184023233e-05, "epoch": 0.748301000864945, "percentage": 37.4, "elapsed_time": "5:27:41", "remaining_time": "9:08:27"} +{"current_steps": 1515, "total_steps": 4048, "loss": 0.3542296886444092, "lr": 1.4791022972710017e-05, "epoch": 0.7487952551587792, "percentage": 37.43, "elapsed_time": "5:27:53", "remaining_time": "9:08:13"} +{"current_steps": 1516, "total_steps": 4048, "loss": 0.27578431367874146, "lr": 1.4783849562978319e-05, "epoch": 0.7492895094526134, "percentage": 37.45, "elapsed_time": "5:28:06", "remaining_time": "9:08:00"} +{"current_steps": 1517, "total_steps": 4048, "loss": 0.32235798239707947, "lr": 1.4776672959617006e-05, "epoch": 0.7497837637464475, "percentage": 37.48, "elapsed_time": "5:28:19", "remaining_time": "9:07:46"} +{"current_steps": 1518, "total_steps": 4048, "loss": 0.30588477849960327, "lr": 1.4769493167417079e-05, "epoch": 0.7502780180402817, "percentage": 37.5, "elapsed_time": "5:28:31", "remaining_time": "9:07:32"} +{"current_steps": 1519, "total_steps": 4048, "loss": 0.31242361664772034, "lr": 1.4762310191171657e-05, "epoch": 0.7507722723341159, "percentage": 37.52, "elapsed_time": "5:28:44", "remaining_time": "9:07:19"} +{"current_steps": 1520, "total_steps": 4048, "loss": 0.3679526150226593, "lr": 1.4755124035675995e-05, "epoch": 0.7512665266279501, "percentage": 37.55, "elapsed_time": "5:28:56", "remaining_time": "9:07:05"} +{"current_steps": 1521, "total_steps": 4048, "loss": 0.28588515520095825, "lr": 1.4747934705727473e-05, "epoch": 0.7517607809217842, "percentage": 37.57, "elapsed_time": "5:29:10", "remaining_time": "9:06:53"} +{"current_steps": 1522, "total_steps": 4048, "loss": 0.29861775040626526, "lr": 1.4740742206125582e-05, "epoch": 0.7522550352156184, "percentage": 37.6, "elapsed_time": "5:29:22", "remaining_time": "9:06:39"} +{"current_steps": 1523, "total_steps": 4048, "loss": 0.31373754143714905, "lr": 1.4733546541671928e-05, "epoch": 0.7527492895094526, "percentage": 37.62, "elapsed_time": "5:29:35", "remaining_time": "9:06:25"} +{"current_steps": 1524, "total_steps": 4048, "loss": 0.3127061128616333, "lr": 1.472634771717024e-05, "epoch": 0.7532435438032868, "percentage": 37.65, "elapsed_time": "5:29:48", "remaining_time": "9:06:13"} +{"current_steps": 1525, "total_steps": 4048, "loss": 0.33681541681289673, "lr": 1.4719145737426346e-05, "epoch": 0.7537377980971209, "percentage": 37.67, "elapsed_time": "5:30:01", "remaining_time": "9:05:59"} +{"current_steps": 1526, "total_steps": 4048, "loss": 0.30266639590263367, "lr": 1.4711940607248182e-05, "epoch": 0.7542320523909551, "percentage": 37.7, "elapsed_time": "5:30:14", "remaining_time": "9:05:46"} +{"current_steps": 1527, "total_steps": 4048, "loss": 0.2988300323486328, "lr": 1.47047323314458e-05, "epoch": 0.7547263066847893, "percentage": 37.72, "elapsed_time": "5:30:26", "remaining_time": "9:05:33"} +{"current_steps": 1528, "total_steps": 4048, "loss": 0.32679620385169983, "lr": 1.4697520914831334e-05, "epoch": 0.7552205609786236, "percentage": 37.75, "elapsed_time": "5:30:40", "remaining_time": "9:05:20"} +{"current_steps": 1529, "total_steps": 4048, "loss": 0.2935605049133301, "lr": 1.4690306362219024e-05, "epoch": 0.7557148152724577, "percentage": 37.77, "elapsed_time": "5:30:53", "remaining_time": "9:05:07"} +{"current_steps": 1530, "total_steps": 4048, "loss": 0.303417831659317, "lr": 1.4683088678425204e-05, "epoch": 0.7562090695662919, "percentage": 37.8, "elapsed_time": "5:31:06", "remaining_time": "9:04:54"} +{"current_steps": 1531, "total_steps": 4048, "loss": 0.30822527408599854, "lr": 1.4675867868268295e-05, "epoch": 0.7567033238601261, "percentage": 37.82, "elapsed_time": "5:31:18", "remaining_time": "9:04:41"} +{"current_steps": 1532, "total_steps": 4048, "loss": 0.3104674220085144, "lr": 1.4668643936568807e-05, "epoch": 0.7571975781539602, "percentage": 37.85, "elapsed_time": "5:31:31", "remaining_time": "9:04:28"} +{"current_steps": 1533, "total_steps": 4048, "loss": 0.27899307012557983, "lr": 1.4661416888149333e-05, "epoch": 0.7576918324477944, "percentage": 37.87, "elapsed_time": "5:31:44", "remaining_time": "9:04:14"} +{"current_steps": 1534, "total_steps": 4048, "loss": 0.3285380005836487, "lr": 1.465418672783455e-05, "epoch": 0.7581860867416286, "percentage": 37.9, "elapsed_time": "5:31:56", "remaining_time": "9:04:00"} +{"current_steps": 1535, "total_steps": 4048, "loss": 0.32028889656066895, "lr": 1.4646953460451205e-05, "epoch": 0.7586803410354628, "percentage": 37.92, "elapsed_time": "5:32:10", "remaining_time": "9:03:48"} +{"current_steps": 1536, "total_steps": 4048, "loss": 0.29870709776878357, "lr": 1.4639717090828127e-05, "epoch": 0.7591745953292969, "percentage": 37.94, "elapsed_time": "5:32:22", "remaining_time": "9:03:34"} +{"current_steps": 1537, "total_steps": 4048, "loss": 0.3556699752807617, "lr": 1.4632477623796216e-05, "epoch": 0.7596688496231311, "percentage": 37.97, "elapsed_time": "5:32:35", "remaining_time": "9:03:21"} +{"current_steps": 1538, "total_steps": 4048, "loss": 0.3433789014816284, "lr": 1.462523506418843e-05, "epoch": 0.7601631039169653, "percentage": 37.99, "elapsed_time": "5:32:48", "remaining_time": "9:03:07"} +{"current_steps": 1539, "total_steps": 4048, "loss": 0.3146114945411682, "lr": 1.4617989416839802e-05, "epoch": 0.7606573582107995, "percentage": 38.02, "elapsed_time": "5:33:01", "remaining_time": "9:02:55"} +{"current_steps": 1540, "total_steps": 4048, "loss": 0.29029202461242676, "lr": 1.4610740686587424e-05, "epoch": 0.7611516125046336, "percentage": 38.04, "elapsed_time": "5:33:13", "remaining_time": "9:02:41"} +{"current_steps": 1541, "total_steps": 4048, "loss": 0.2976688742637634, "lr": 1.4603488878270442e-05, "epoch": 0.7616458667984678, "percentage": 38.07, "elapsed_time": "5:33:27", "remaining_time": "9:02:29"} +{"current_steps": 1542, "total_steps": 4048, "loss": 0.28604352474212646, "lr": 1.459623399673006e-05, "epoch": 0.762140121092302, "percentage": 38.09, "elapsed_time": "5:33:40", "remaining_time": "9:02:15"} +{"current_steps": 1543, "total_steps": 4048, "loss": 0.34977301955223083, "lr": 1.4588976046809536e-05, "epoch": 0.7626343753861362, "percentage": 38.12, "elapsed_time": "5:33:53", "remaining_time": "9:02:03"} +{"current_steps": 1544, "total_steps": 4048, "loss": 0.31592974066734314, "lr": 1.458171503335417e-05, "epoch": 0.7631286296799703, "percentage": 38.14, "elapsed_time": "5:34:05", "remaining_time": "9:01:49"} +{"current_steps": 1545, "total_steps": 4048, "loss": 0.31539830565452576, "lr": 1.4574450961211312e-05, "epoch": 0.7636228839738045, "percentage": 38.17, "elapsed_time": "5:34:18", "remaining_time": "9:01:36"} +{"current_steps": 1546, "total_steps": 4048, "loss": 0.3100752532482147, "lr": 1.4567183835230355e-05, "epoch": 0.7641171382676387, "percentage": 38.19, "elapsed_time": "5:34:31", "remaining_time": "9:01:22"} +{"current_steps": 1547, "total_steps": 4048, "loss": 0.31005364656448364, "lr": 1.4559913660262726e-05, "epoch": 0.7646113925614729, "percentage": 38.22, "elapsed_time": "5:34:43", "remaining_time": "9:01:08"} +{"current_steps": 1548, "total_steps": 4048, "loss": 0.3050577640533447, "lr": 1.4552640441161889e-05, "epoch": 0.765105646855307, "percentage": 38.24, "elapsed_time": "5:34:56", "remaining_time": "9:00:54"} +{"current_steps": 1549, "total_steps": 4048, "loss": 0.294721394777298, "lr": 1.4545364182783343e-05, "epoch": 0.7655999011491412, "percentage": 38.27, "elapsed_time": "5:35:08", "remaining_time": "9:00:41"} +{"current_steps": 1550, "total_steps": 4048, "loss": 0.2974075376987457, "lr": 1.4538084889984616e-05, "epoch": 0.7660941554429754, "percentage": 38.29, "elapsed_time": "5:35:20", "remaining_time": "9:00:27"} +{"current_steps": 1551, "total_steps": 4048, "loss": 0.3247089385986328, "lr": 1.4530802567625259e-05, "epoch": 0.7665884097368096, "percentage": 38.32, "elapsed_time": "5:35:33", "remaining_time": "9:00:13"} +{"current_steps": 1552, "total_steps": 4048, "loss": 0.3219151198863983, "lr": 1.4523517220566843e-05, "epoch": 0.7670826640306437, "percentage": 38.34, "elapsed_time": "5:35:46", "remaining_time": "8:59:59"} +{"current_steps": 1553, "total_steps": 4048, "loss": 0.30580246448516846, "lr": 1.4516228853672962e-05, "epoch": 0.7675769183244779, "percentage": 38.36, "elapsed_time": "5:35:59", "remaining_time": "8:59:46"} +{"current_steps": 1554, "total_steps": 4048, "loss": 0.2983207702636719, "lr": 1.4508937471809233e-05, "epoch": 0.7680711726183121, "percentage": 38.39, "elapsed_time": "5:36:12", "remaining_time": "8:59:34"} +{"current_steps": 1555, "total_steps": 4048, "loss": 0.3429039418697357, "lr": 1.4501643079843266e-05, "epoch": 0.7685654269121464, "percentage": 38.41, "elapsed_time": "5:36:25", "remaining_time": "8:59:22"} +{"current_steps": 1556, "total_steps": 4048, "loss": 0.3055192530155182, "lr": 1.4494345682644704e-05, "epoch": 0.7690596812059804, "percentage": 38.44, "elapsed_time": "5:36:38", "remaining_time": "8:59:09"} +{"current_steps": 1557, "total_steps": 4048, "loss": 0.2964102327823639, "lr": 1.4487045285085178e-05, "epoch": 0.7695539354998147, "percentage": 38.46, "elapsed_time": "5:36:51", "remaining_time": "8:58:55"} +{"current_steps": 1558, "total_steps": 4048, "loss": 0.3088444471359253, "lr": 1.4479741892038335e-05, "epoch": 0.7700481897936489, "percentage": 38.49, "elapsed_time": "5:37:04", "remaining_time": "8:58:42"} +{"current_steps": 1559, "total_steps": 4048, "loss": 0.28697890043258667, "lr": 1.4472435508379808e-05, "epoch": 0.770542444087483, "percentage": 38.51, "elapsed_time": "5:37:17", "remaining_time": "8:58:29"} +{"current_steps": 1560, "total_steps": 4048, "loss": 0.3664681315422058, "lr": 1.4465126138987242e-05, "epoch": 0.7710366983813172, "percentage": 38.54, "elapsed_time": "5:37:30", "remaining_time": "8:58:16"} +{"current_steps": 1561, "total_steps": 4048, "loss": 0.3282932937145233, "lr": 1.4457813788740263e-05, "epoch": 0.7715309526751514, "percentage": 38.56, "elapsed_time": "5:37:42", "remaining_time": "8:58:02"} +{"current_steps": 1562, "total_steps": 4048, "loss": 0.27597576379776, "lr": 1.4450498462520495e-05, "epoch": 0.7720252069689856, "percentage": 38.59, "elapsed_time": "5:37:54", "remaining_time": "8:57:48"} +{"current_steps": 1563, "total_steps": 4048, "loss": 0.3553946614265442, "lr": 1.4443180165211541e-05, "epoch": 0.7725194612628197, "percentage": 38.61, "elapsed_time": "5:38:07", "remaining_time": "8:57:34"} +{"current_steps": 1564, "total_steps": 4048, "loss": 0.36224859952926636, "lr": 1.4435858901698995e-05, "epoch": 0.7730137155566539, "percentage": 38.64, "elapsed_time": "5:38:19", "remaining_time": "8:57:21"} +{"current_steps": 1565, "total_steps": 4048, "loss": 0.2940914034843445, "lr": 1.4428534676870427e-05, "epoch": 0.7735079698504881, "percentage": 38.66, "elapsed_time": "5:38:32", "remaining_time": "8:57:08"} +{"current_steps": 1566, "total_steps": 4048, "loss": 0.2717741131782532, "lr": 1.4421207495615385e-05, "epoch": 0.7740022241443223, "percentage": 38.69, "elapsed_time": "5:38:45", "remaining_time": "8:56:54"} +{"current_steps": 1567, "total_steps": 4048, "loss": 0.32340431213378906, "lr": 1.441387736282539e-05, "epoch": 0.7744964784381564, "percentage": 38.71, "elapsed_time": "5:38:58", "remaining_time": "8:56:41"} +{"current_steps": 1568, "total_steps": 4048, "loss": 0.3080120086669922, "lr": 1.4406544283393935e-05, "epoch": 0.7749907327319906, "percentage": 38.74, "elapsed_time": "5:39:11", "remaining_time": "8:56:28"} +{"current_steps": 1569, "total_steps": 4048, "loss": 0.3118380308151245, "lr": 1.4399208262216475e-05, "epoch": 0.7754849870258248, "percentage": 38.76, "elapsed_time": "5:39:24", "remaining_time": "8:56:16"} +{"current_steps": 1570, "total_steps": 4048, "loss": 0.3086084723472595, "lr": 1.439186930419044e-05, "epoch": 0.775979241319659, "percentage": 38.78, "elapsed_time": "5:39:37", "remaining_time": "8:56:02"} +{"current_steps": 1571, "total_steps": 4048, "loss": 0.3233364522457123, "lr": 1.438452741421521e-05, "epoch": 0.7764734956134931, "percentage": 38.81, "elapsed_time": "5:39:50", "remaining_time": "8:55:50"} +{"current_steps": 1572, "total_steps": 4048, "loss": 0.29029640555381775, "lr": 1.4377182597192124e-05, "epoch": 0.7769677499073273, "percentage": 38.83, "elapsed_time": "5:40:03", "remaining_time": "8:55:37"} +{"current_steps": 1573, "total_steps": 4048, "loss": 0.2888006567955017, "lr": 1.4369834858024476e-05, "epoch": 0.7774620042011615, "percentage": 38.86, "elapsed_time": "5:40:17", "remaining_time": "8:55:24"} +{"current_steps": 1574, "total_steps": 4048, "loss": 0.3260151743888855, "lr": 1.4362484201617519e-05, "epoch": 0.7779562584949957, "percentage": 38.88, "elapsed_time": "5:40:29", "remaining_time": "8:55:11"} +{"current_steps": 1575, "total_steps": 4048, "loss": 0.333207905292511, "lr": 1.4355130632878439e-05, "epoch": 0.7784505127888298, "percentage": 38.91, "elapsed_time": "5:40:43", "remaining_time": "8:54:59"} +{"current_steps": 1576, "total_steps": 4048, "loss": 0.2577935457229614, "lr": 1.4347774156716375e-05, "epoch": 0.778944767082664, "percentage": 38.93, "elapsed_time": "5:40:56", "remaining_time": "8:54:45"} +{"current_steps": 1577, "total_steps": 4048, "loss": 0.29645979404449463, "lr": 1.434041477804241e-05, "epoch": 0.7794390213764982, "percentage": 38.96, "elapsed_time": "5:41:09", "remaining_time": "8:54:33"} +{"current_steps": 1578, "total_steps": 4048, "loss": 0.2973156273365021, "lr": 1.433305250176955e-05, "epoch": 0.7799332756703324, "percentage": 38.98, "elapsed_time": "5:41:22", "remaining_time": "8:54:20"} +{"current_steps": 1579, "total_steps": 4048, "loss": 0.29159975051879883, "lr": 1.4325687332812754e-05, "epoch": 0.7804275299641665, "percentage": 39.01, "elapsed_time": "5:41:35", "remaining_time": "8:54:07"} +{"current_steps": 1580, "total_steps": 4048, "loss": 0.29718664288520813, "lr": 1.4318319276088902e-05, "epoch": 0.7809217842580007, "percentage": 39.03, "elapsed_time": "5:41:48", "remaining_time": "8:53:54"} +{"current_steps": 1581, "total_steps": 4048, "loss": 0.3262369632720947, "lr": 1.4310948336516803e-05, "epoch": 0.781416038551835, "percentage": 39.06, "elapsed_time": "5:42:02", "remaining_time": "8:53:43"} +{"current_steps": 1582, "total_steps": 4048, "loss": 0.36491623520851135, "lr": 1.4303574519017187e-05, "epoch": 0.781910292845669, "percentage": 39.08, "elapsed_time": "5:42:15", "remaining_time": "8:53:29"} +{"current_steps": 1583, "total_steps": 4048, "loss": 0.3558582365512848, "lr": 1.4296197828512716e-05, "epoch": 0.7824045471395032, "percentage": 39.11, "elapsed_time": "5:42:27", "remaining_time": "8:53:15"} +{"current_steps": 1584, "total_steps": 4048, "loss": 0.2745930552482605, "lr": 1.428881826992796e-05, "epoch": 0.7828988014333375, "percentage": 39.13, "elapsed_time": "5:42:40", "remaining_time": "8:53:02"} +{"current_steps": 1585, "total_steps": 4048, "loss": 0.3239384889602661, "lr": 1.4281435848189404e-05, "epoch": 0.7833930557271717, "percentage": 39.16, "elapsed_time": "5:42:52", "remaining_time": "8:52:48"} +{"current_steps": 1586, "total_steps": 4048, "loss": 0.2708761692047119, "lr": 1.4274050568225452e-05, "epoch": 0.7838873100210058, "percentage": 39.18, "elapsed_time": "5:43:05", "remaining_time": "8:52:35"} +{"current_steps": 1587, "total_steps": 4048, "loss": 0.3633013963699341, "lr": 1.4266662434966412e-05, "epoch": 0.78438156431484, "percentage": 39.2, "elapsed_time": "5:43:17", "remaining_time": "8:52:21"} +{"current_steps": 1588, "total_steps": 4048, "loss": 0.36411651968955994, "lr": 1.425927145334449e-05, "epoch": 0.7848758186086742, "percentage": 39.23, "elapsed_time": "5:43:30", "remaining_time": "8:52:08"} +{"current_steps": 1589, "total_steps": 4048, "loss": 0.3120966851711273, "lr": 1.4251877628293804e-05, "epoch": 0.7853700729025084, "percentage": 39.25, "elapsed_time": "5:43:43", "remaining_time": "8:51:55"} +{"current_steps": 1590, "total_steps": 4048, "loss": 0.32788634300231934, "lr": 1.4244480964750365e-05, "epoch": 0.7858643271963425, "percentage": 39.28, "elapsed_time": "5:43:56", "remaining_time": "8:51:42"} +{"current_steps": 1591, "total_steps": 4048, "loss": 0.2919159233570099, "lr": 1.423708146765208e-05, "epoch": 0.7863585814901767, "percentage": 39.3, "elapsed_time": "5:44:09", "remaining_time": "8:51:28"} +{"current_steps": 1592, "total_steps": 4048, "loss": 0.3135683834552765, "lr": 1.4229679141938749e-05, "epoch": 0.7868528357840109, "percentage": 39.33, "elapsed_time": "5:44:22", "remaining_time": "8:51:16"} +{"current_steps": 1593, "total_steps": 4048, "loss": 0.351981520652771, "lr": 1.4222273992552058e-05, "epoch": 0.7873470900778451, "percentage": 39.35, "elapsed_time": "5:44:35", "remaining_time": "8:51:02"} +{"current_steps": 1594, "total_steps": 4048, "loss": 0.3615785837173462, "lr": 1.4214866024435576e-05, "epoch": 0.7878413443716792, "percentage": 39.38, "elapsed_time": "5:44:47", "remaining_time": "8:50:49"} +{"current_steps": 1595, "total_steps": 4048, "loss": 0.29399484395980835, "lr": 1.420745524253476e-05, "epoch": 0.7883355986655134, "percentage": 39.4, "elapsed_time": "5:45:00", "remaining_time": "8:50:36"} +{"current_steps": 1596, "total_steps": 4048, "loss": 0.30501872301101685, "lr": 1.420004165179694e-05, "epoch": 0.7888298529593476, "percentage": 39.43, "elapsed_time": "5:45:13", "remaining_time": "8:50:22"} +{"current_steps": 1597, "total_steps": 4048, "loss": 0.33745667338371277, "lr": 1.4192625257171331e-05, "epoch": 0.7893241072531818, "percentage": 39.45, "elapsed_time": "5:45:25", "remaining_time": "8:50:08"} +{"current_steps": 1598, "total_steps": 4048, "loss": 0.2675662934780121, "lr": 1.4185206063609e-05, "epoch": 0.7898183615470159, "percentage": 39.48, "elapsed_time": "5:45:37", "remaining_time": "8:49:54"} +{"current_steps": 1599, "total_steps": 4048, "loss": 0.295659601688385, "lr": 1.41777840760629e-05, "epoch": 0.7903126158408501, "percentage": 39.5, "elapsed_time": "5:45:50", "remaining_time": "8:49:41"} +{"current_steps": 1600, "total_steps": 4048, "loss": 0.3164275586605072, "lr": 1.4170359299487848e-05, "epoch": 0.7908068701346843, "percentage": 39.53, "elapsed_time": "5:46:03", "remaining_time": "8:49:27"} +{"current_steps": 1601, "total_steps": 4048, "loss": 0.3039100766181946, "lr": 1.416293173884051e-05, "epoch": 0.7913011244285185, "percentage": 39.55, "elapsed_time": "5:46:20", "remaining_time": "8:49:21"} +{"current_steps": 1602, "total_steps": 4048, "loss": 0.2994040846824646, "lr": 1.4155501399079427e-05, "epoch": 0.7917953787223526, "percentage": 39.58, "elapsed_time": "5:46:33", "remaining_time": "8:49:08"} +{"current_steps": 1603, "total_steps": 4048, "loss": 0.3129369616508484, "lr": 1.4148068285164984e-05, "epoch": 0.7922896330161868, "percentage": 39.6, "elapsed_time": "5:46:46", "remaining_time": "8:48:56"} +{"current_steps": 1604, "total_steps": 4048, "loss": 0.3223167657852173, "lr": 1.4140632402059424e-05, "epoch": 0.792783887310021, "percentage": 39.62, "elapsed_time": "5:46:59", "remaining_time": "8:48:42"} +{"current_steps": 1605, "total_steps": 4048, "loss": 0.2734811305999756, "lr": 1.4133193754726834e-05, "epoch": 0.7932781416038552, "percentage": 39.65, "elapsed_time": "5:47:12", "remaining_time": "8:48:29"} +{"current_steps": 1606, "total_steps": 4048, "loss": 0.27474087476730347, "lr": 1.4125752348133148e-05, "epoch": 0.7937723958976893, "percentage": 39.67, "elapsed_time": "5:47:25", "remaining_time": "8:48:16"} +{"current_steps": 1607, "total_steps": 4048, "loss": 0.2619907557964325, "lr": 1.4118308187246145e-05, "epoch": 0.7942666501915235, "percentage": 39.7, "elapsed_time": "5:47:39", "remaining_time": "8:48:04"} +{"current_steps": 1608, "total_steps": 4048, "loss": 0.3176937699317932, "lr": 1.411086127703544e-05, "epoch": 0.7947609044853577, "percentage": 39.72, "elapsed_time": "5:47:52", "remaining_time": "8:47:52"} +{"current_steps": 1609, "total_steps": 4048, "loss": 0.28044235706329346, "lr": 1.4103411622472483e-05, "epoch": 0.7952551587791918, "percentage": 39.75, "elapsed_time": "5:48:06", "remaining_time": "8:47:40"} +{"current_steps": 1610, "total_steps": 4048, "loss": 0.27778196334838867, "lr": 1.409595922853056e-05, "epoch": 0.795749413073026, "percentage": 39.77, "elapsed_time": "5:48:20", "remaining_time": "8:47:28"} +{"current_steps": 1611, "total_steps": 4048, "loss": 0.3168628513813019, "lr": 1.4088504100184777e-05, "epoch": 0.7962436673668603, "percentage": 39.8, "elapsed_time": "5:48:33", "remaining_time": "8:47:15"} +{"current_steps": 1612, "total_steps": 4048, "loss": 0.30454084277153015, "lr": 1.4081046242412075e-05, "epoch": 0.7967379216606945, "percentage": 39.82, "elapsed_time": "5:48:46", "remaining_time": "8:47:03"} +{"current_steps": 1613, "total_steps": 4048, "loss": 0.34019169211387634, "lr": 1.4073585660191214e-05, "epoch": 0.7972321759545286, "percentage": 39.85, "elapsed_time": "5:48:59", "remaining_time": "8:46:50"} +{"current_steps": 1614, "total_steps": 4048, "loss": 0.3044774830341339, "lr": 1.4066122358502772e-05, "epoch": 0.7977264302483628, "percentage": 39.87, "elapsed_time": "5:49:12", "remaining_time": "8:46:37"} +{"current_steps": 1615, "total_steps": 4048, "loss": 0.3181847333908081, "lr": 1.4058656342329136e-05, "epoch": 0.798220684542197, "percentage": 39.9, "elapsed_time": "5:49:25", "remaining_time": "8:46:24"} +{"current_steps": 1616, "total_steps": 4048, "loss": 0.3400845229625702, "lr": 1.405118761665452e-05, "epoch": 0.7987149388360312, "percentage": 39.92, "elapsed_time": "5:49:38", "remaining_time": "8:46:11"} +{"current_steps": 1617, "total_steps": 4048, "loss": 0.2845221161842346, "lr": 1.4043716186464935e-05, "epoch": 0.7992091931298653, "percentage": 39.95, "elapsed_time": "5:49:51", "remaining_time": "8:45:58"} +{"current_steps": 1618, "total_steps": 4048, "loss": 0.27315276861190796, "lr": 1.4036242056748202e-05, "epoch": 0.7997034474236995, "percentage": 39.97, "elapsed_time": "5:50:04", "remaining_time": "8:45:46"} +{"current_steps": 1619, "total_steps": 4048, "loss": 0.3388780951499939, "lr": 1.4028765232493942e-05, "epoch": 0.8001977017175337, "percentage": 40.0, "elapsed_time": "5:50:17", "remaining_time": "8:45:33"} +{"current_steps": 1620, "total_steps": 4048, "loss": 0.338635116815567, "lr": 1.4021285718693581e-05, "epoch": 0.8006919560113679, "percentage": 40.02, "elapsed_time": "5:50:31", "remaining_time": "8:45:21"} +{"current_steps": 1621, "total_steps": 4048, "loss": 0.26962924003601074, "lr": 1.4013803520340328e-05, "epoch": 0.801186210305202, "percentage": 40.04, "elapsed_time": "5:50:44", "remaining_time": "8:45:08"} +{"current_steps": 1622, "total_steps": 4048, "loss": 0.32106393575668335, "lr": 1.4006318642429194e-05, "epoch": 0.8016804645990362, "percentage": 40.07, "elapsed_time": "5:50:57", "remaining_time": "8:44:55"} +{"current_steps": 1623, "total_steps": 4048, "loss": 0.33063358068466187, "lr": 1.399883108995698e-05, "epoch": 0.8021747188928704, "percentage": 40.09, "elapsed_time": "5:51:11", "remaining_time": "8:44:44"} +{"current_steps": 1624, "total_steps": 4048, "loss": 0.31906163692474365, "lr": 1.3991340867922266e-05, "epoch": 0.8026689731867046, "percentage": 40.12, "elapsed_time": "5:51:25", "remaining_time": "8:44:31"} +{"current_steps": 1625, "total_steps": 4048, "loss": 0.2601381242275238, "lr": 1.3983847981325415e-05, "epoch": 0.8031632274805387, "percentage": 40.14, "elapsed_time": "5:51:39", "remaining_time": "8:44:20"} +{"current_steps": 1626, "total_steps": 4048, "loss": 0.3342537581920624, "lr": 1.3976352435168577e-05, "epoch": 0.8036574817743729, "percentage": 40.17, "elapsed_time": "5:51:52", "remaining_time": "8:44:07"} +{"current_steps": 1627, "total_steps": 4048, "loss": 0.3372059166431427, "lr": 1.3968854234455669e-05, "epoch": 0.8041517360682071, "percentage": 40.19, "elapsed_time": "5:52:06", "remaining_time": "8:43:56"} +{"current_steps": 1628, "total_steps": 4048, "loss": 0.31026744842529297, "lr": 1.3961353384192377e-05, "epoch": 0.8046459903620413, "percentage": 40.22, "elapsed_time": "5:52:19", "remaining_time": "8:43:44"} +{"current_steps": 1629, "total_steps": 4048, "loss": 0.2867652177810669, "lr": 1.3953849889386173e-05, "epoch": 0.8051402446558754, "percentage": 40.24, "elapsed_time": "5:52:33", "remaining_time": "8:43:32"} +{"current_steps": 1630, "total_steps": 4048, "loss": 0.29169392585754395, "lr": 1.3946343755046274e-05, "epoch": 0.8056344989497096, "percentage": 40.27, "elapsed_time": "5:52:46", "remaining_time": "8:43:19"} +{"current_steps": 1631, "total_steps": 4048, "loss": 0.2976510524749756, "lr": 1.393883498618367e-05, "epoch": 0.8061287532435438, "percentage": 40.29, "elapsed_time": "5:53:00", "remaining_time": "8:43:07"} +{"current_steps": 1632, "total_steps": 4048, "loss": 0.2900371551513672, "lr": 1.3931323587811107e-05, "epoch": 0.806623007537378, "percentage": 40.32, "elapsed_time": "5:53:13", "remaining_time": "8:42:55"} +{"current_steps": 1633, "total_steps": 4048, "loss": 0.31660354137420654, "lr": 1.3923809564943093e-05, "epoch": 0.8071172618312121, "percentage": 40.34, "elapsed_time": "5:53:27", "remaining_time": "8:42:43"} +{"current_steps": 1634, "total_steps": 4048, "loss": 0.3099827468395233, "lr": 1.3916292922595875e-05, "epoch": 0.8076115161250463, "percentage": 40.37, "elapsed_time": "5:53:40", "remaining_time": "8:42:30"} +{"current_steps": 1635, "total_steps": 4048, "loss": 0.34322571754455566, "lr": 1.3908773665787459e-05, "epoch": 0.8081057704188805, "percentage": 40.39, "elapsed_time": "5:53:54", "remaining_time": "8:42:18"} +{"current_steps": 1636, "total_steps": 4048, "loss": 0.2780989408493042, "lr": 1.3901251799537592e-05, "epoch": 0.8086000247127146, "percentage": 40.42, "elapsed_time": "5:54:07", "remaining_time": "8:42:06"} +{"current_steps": 1637, "total_steps": 4048, "loss": 0.31049463152885437, "lr": 1.389372732886777e-05, "epoch": 0.8090942790065488, "percentage": 40.44, "elapsed_time": "5:54:21", "remaining_time": "8:41:54"} +{"current_steps": 1638, "total_steps": 4048, "loss": 0.29925107955932617, "lr": 1.3886200258801213e-05, "epoch": 0.809588533300383, "percentage": 40.46, "elapsed_time": "5:54:35", "remaining_time": "8:41:43"} +{"current_steps": 1639, "total_steps": 4048, "loss": 0.31893983483314514, "lr": 1.3878670594362893e-05, "epoch": 0.8100827875942173, "percentage": 40.49, "elapsed_time": "5:54:49", "remaining_time": "8:41:30"} +{"current_steps": 1640, "total_steps": 4048, "loss": 0.31307080388069153, "lr": 1.3871138340579502e-05, "epoch": 0.8105770418880514, "percentage": 40.51, "elapsed_time": "5:55:03", "remaining_time": "8:41:19"} +{"current_steps": 1641, "total_steps": 4048, "loss": 0.28198909759521484, "lr": 1.3863603502479465e-05, "epoch": 0.8110712961818856, "percentage": 40.54, "elapsed_time": "5:55:16", "remaining_time": "8:41:06"} +{"current_steps": 1642, "total_steps": 4048, "loss": 0.28937461972236633, "lr": 1.3856066085092936e-05, "epoch": 0.8115655504757198, "percentage": 40.56, "elapsed_time": "5:55:30", "remaining_time": "8:40:54"} +{"current_steps": 1643, "total_steps": 4048, "loss": 0.32332292199134827, "lr": 1.3848526093451789e-05, "epoch": 0.812059804769554, "percentage": 40.59, "elapsed_time": "5:55:43", "remaining_time": "8:40:42"} +{"current_steps": 1644, "total_steps": 4048, "loss": 0.3059847056865692, "lr": 1.3840983532589606e-05, "epoch": 0.8125540590633881, "percentage": 40.61, "elapsed_time": "5:55:57", "remaining_time": "8:40:30"} +{"current_steps": 1645, "total_steps": 4048, "loss": 0.2939583957195282, "lr": 1.3833438407541698e-05, "epoch": 0.8130483133572223, "percentage": 40.64, "elapsed_time": "5:56:10", "remaining_time": "8:40:17"} +{"current_steps": 1646, "total_steps": 4048, "loss": 0.3293933868408203, "lr": 1.3825890723345082e-05, "epoch": 0.8135425676510565, "percentage": 40.66, "elapsed_time": "5:56:24", "remaining_time": "8:40:05"} +{"current_steps": 1647, "total_steps": 4048, "loss": 0.33373600244522095, "lr": 1.3818340485038488e-05, "epoch": 0.8140368219448907, "percentage": 40.69, "elapsed_time": "5:56:37", "remaining_time": "8:39:53"} +{"current_steps": 1648, "total_steps": 4048, "loss": 0.2716716527938843, "lr": 1.3810787697662337e-05, "epoch": 0.8145310762387248, "percentage": 40.71, "elapsed_time": "5:56:51", "remaining_time": "8:39:41"} +{"current_steps": 1649, "total_steps": 4048, "loss": 0.26109835505485535, "lr": 1.3803232366258774e-05, "epoch": 0.815025330532559, "percentage": 40.74, "elapsed_time": "5:57:04", "remaining_time": "8:39:28"} +{"current_steps": 1650, "total_steps": 4048, "loss": 0.3161536753177643, "lr": 1.3795674495871627e-05, "epoch": 0.8155195848263932, "percentage": 40.76, "elapsed_time": "5:57:18", "remaining_time": "8:39:16"} +{"current_steps": 1651, "total_steps": 4048, "loss": 0.3078432083129883, "lr": 1.3788114091546414e-05, "epoch": 0.8160138391202274, "percentage": 40.79, "elapsed_time": "5:57:31", "remaining_time": "8:39:03"} +{"current_steps": 1652, "total_steps": 4048, "loss": 0.31023627519607544, "lr": 1.3780551158330364e-05, "epoch": 0.8165080934140615, "percentage": 40.81, "elapsed_time": "5:57:45", "remaining_time": "8:38:52"} +{"current_steps": 1653, "total_steps": 4048, "loss": 0.3438849151134491, "lr": 1.3772985701272374e-05, "epoch": 0.8170023477078957, "percentage": 40.83, "elapsed_time": "5:57:58", "remaining_time": "8:38:39"} +{"current_steps": 1654, "total_steps": 4048, "loss": 0.31897789239883423, "lr": 1.376541772542304e-05, "epoch": 0.8174966020017299, "percentage": 40.86, "elapsed_time": "5:58:11", "remaining_time": "8:38:26"} +{"current_steps": 1655, "total_steps": 4048, "loss": 0.3101171553134918, "lr": 1.3757847235834636e-05, "epoch": 0.8179908562955641, "percentage": 40.88, "elapsed_time": "5:58:25", "remaining_time": "8:38:14"} +{"current_steps": 1656, "total_steps": 4048, "loss": 0.28926995396614075, "lr": 1.375027423756111e-05, "epoch": 0.8184851105893982, "percentage": 40.91, "elapsed_time": "5:58:38", "remaining_time": "8:38:02"} +{"current_steps": 1657, "total_steps": 4048, "loss": 0.322610080242157, "lr": 1.3742698735658087e-05, "epoch": 0.8189793648832324, "percentage": 40.93, "elapsed_time": "5:58:52", "remaining_time": "8:37:50"} +{"current_steps": 1658, "total_steps": 4048, "loss": 0.27430039644241333, "lr": 1.3735120735182865e-05, "epoch": 0.8194736191770666, "percentage": 40.96, "elapsed_time": "5:59:05", "remaining_time": "8:37:37"} +{"current_steps": 1659, "total_steps": 4048, "loss": 0.3091571629047394, "lr": 1.3727540241194408e-05, "epoch": 0.8199678734709008, "percentage": 40.98, "elapsed_time": "5:59:18", "remaining_time": "8:37:25"} +{"current_steps": 1660, "total_steps": 4048, "loss": 0.3039378523826599, "lr": 1.3719957258753347e-05, "epoch": 0.8204621277647349, "percentage": 41.01, "elapsed_time": "5:59:31", "remaining_time": "8:37:12"} +{"current_steps": 1661, "total_steps": 4048, "loss": 0.29711851477622986, "lr": 1.371237179292197e-05, "epoch": 0.8209563820585691, "percentage": 41.03, "elapsed_time": "5:59:45", "remaining_time": "8:37:00"} +{"current_steps": 1662, "total_steps": 4048, "loss": 0.32411956787109375, "lr": 1.370478384876423e-05, "epoch": 0.8214506363524033, "percentage": 41.06, "elapsed_time": "5:59:58", "remaining_time": "8:36:47"} +{"current_steps": 1663, "total_steps": 4048, "loss": 0.2981719672679901, "lr": 1.3697193431345725e-05, "epoch": 0.8219448906462374, "percentage": 41.08, "elapsed_time": "6:00:12", "remaining_time": "8:36:35"} +{"current_steps": 1664, "total_steps": 4048, "loss": 0.32756730914115906, "lr": 1.3689600545733713e-05, "epoch": 0.8224391449400716, "percentage": 41.11, "elapsed_time": "6:00:25", "remaining_time": "8:36:22"} +{"current_steps": 1665, "total_steps": 4048, "loss": 0.3910979628562927, "lr": 1.3682005196997094e-05, "epoch": 0.8229333992339058, "percentage": 41.13, "elapsed_time": "6:00:38", "remaining_time": "8:36:10"} +{"current_steps": 1666, "total_steps": 4048, "loss": 0.31716856360435486, "lr": 1.3674407390206417e-05, "epoch": 0.82342765352774, "percentage": 41.16, "elapsed_time": "6:00:53", "remaining_time": "8:35:59"} +{"current_steps": 1667, "total_steps": 4048, "loss": 0.31816208362579346, "lr": 1.3666807130433865e-05, "epoch": 0.8239219078215742, "percentage": 41.18, "elapsed_time": "6:01:06", "remaining_time": "8:35:46"} +{"current_steps": 1668, "total_steps": 4048, "loss": 0.3008955121040344, "lr": 1.3659204422753265e-05, "epoch": 0.8244161621154084, "percentage": 41.21, "elapsed_time": "6:01:20", "remaining_time": "8:35:34"} +{"current_steps": 1669, "total_steps": 4048, "loss": 0.2957409918308258, "lr": 1.3651599272240078e-05, "epoch": 0.8249104164092426, "percentage": 41.23, "elapsed_time": "6:01:33", "remaining_time": "8:35:22"} +{"current_steps": 1670, "total_steps": 4048, "loss": 0.33019471168518066, "lr": 1.364399168397139e-05, "epoch": 0.8254046707030768, "percentage": 41.25, "elapsed_time": "6:01:47", "remaining_time": "8:35:10"} +{"current_steps": 1671, "total_steps": 4048, "loss": 0.3532376289367676, "lr": 1.3636381663025917e-05, "epoch": 0.8258989249969109, "percentage": 41.28, "elapsed_time": "6:02:00", "remaining_time": "8:34:57"} +{"current_steps": 1672, "total_steps": 4048, "loss": 0.2980180084705353, "lr": 1.362876921448401e-05, "epoch": 0.8263931792907451, "percentage": 41.3, "elapsed_time": "6:02:14", "remaining_time": "8:34:45"} +{"current_steps": 1673, "total_steps": 4048, "loss": 0.27932479977607727, "lr": 1.362115434342762e-05, "epoch": 0.8268874335845793, "percentage": 41.33, "elapsed_time": "6:02:27", "remaining_time": "8:34:33"} +{"current_steps": 1674, "total_steps": 4048, "loss": 0.2783966064453125, "lr": 1.3613537054940331e-05, "epoch": 0.8273816878784135, "percentage": 41.35, "elapsed_time": "6:02:41", "remaining_time": "8:34:21"} +{"current_steps": 1675, "total_steps": 4048, "loss": 0.2957308888435364, "lr": 1.3605917354107336e-05, "epoch": 0.8278759421722476, "percentage": 41.38, "elapsed_time": "6:02:54", "remaining_time": "8:34:08"} +{"current_steps": 1676, "total_steps": 4048, "loss": 0.31640201807022095, "lr": 1.3598295246015439e-05, "epoch": 0.8283701964660818, "percentage": 41.4, "elapsed_time": "6:03:08", "remaining_time": "8:33:56"} +{"current_steps": 1677, "total_steps": 4048, "loss": 0.2969709634780884, "lr": 1.3590670735753047e-05, "epoch": 0.828864450759916, "percentage": 41.43, "elapsed_time": "6:03:21", "remaining_time": "8:33:44"} +{"current_steps": 1678, "total_steps": 4048, "loss": 0.34167301654815674, "lr": 1.3583043828410177e-05, "epoch": 0.8293587050537502, "percentage": 41.45, "elapsed_time": "6:03:34", "remaining_time": "8:33:31"} +{"current_steps": 1679, "total_steps": 4048, "loss": 0.28540804982185364, "lr": 1.3575414529078443e-05, "epoch": 0.8298529593475843, "percentage": 41.48, "elapsed_time": "6:03:48", "remaining_time": "8:33:18"} +{"current_steps": 1680, "total_steps": 4048, "loss": 0.2962091565132141, "lr": 1.3567782842851054e-05, "epoch": 0.8303472136414185, "percentage": 41.5, "elapsed_time": "6:04:01", "remaining_time": "8:33:05"} +{"current_steps": 1681, "total_steps": 4048, "loss": 0.3650284707546234, "lr": 1.3560148774822816e-05, "epoch": 0.8308414679352527, "percentage": 41.53, "elapsed_time": "6:04:14", "remaining_time": "8:32:53"} +{"current_steps": 1682, "total_steps": 4048, "loss": 0.3134267330169678, "lr": 1.3552512330090126e-05, "epoch": 0.8313357222290869, "percentage": 41.55, "elapsed_time": "6:04:27", "remaining_time": "8:32:40"} +{"current_steps": 1683, "total_steps": 4048, "loss": 0.3020439147949219, "lr": 1.3544873513750967e-05, "epoch": 0.831829976522921, "percentage": 41.58, "elapsed_time": "6:04:41", "remaining_time": "8:32:27"} +{"current_steps": 1684, "total_steps": 4048, "loss": 0.25083282589912415, "lr": 1.3537232330904895e-05, "epoch": 0.8323242308167552, "percentage": 41.6, "elapsed_time": "6:04:54", "remaining_time": "8:32:15"} +{"current_steps": 1685, "total_steps": 4048, "loss": 0.33875352144241333, "lr": 1.3529588786653063e-05, "epoch": 0.8328184851105894, "percentage": 41.63, "elapsed_time": "6:05:07", "remaining_time": "8:32:02"} +{"current_steps": 1686, "total_steps": 4048, "loss": 0.2717735171318054, "lr": 1.3521942886098186e-05, "epoch": 0.8333127394044236, "percentage": 41.65, "elapsed_time": "6:05:20", "remaining_time": "8:31:49"} +{"current_steps": 1687, "total_steps": 4048, "loss": 0.271842896938324, "lr": 1.3514294634344562e-05, "epoch": 0.8338069936982577, "percentage": 41.67, "elapsed_time": "6:05:34", "remaining_time": "8:31:37"} +{"current_steps": 1688, "total_steps": 4048, "loss": 0.29420506954193115, "lr": 1.3506644036498054e-05, "epoch": 0.8343012479920919, "percentage": 41.7, "elapsed_time": "6:05:47", "remaining_time": "8:31:25"} +{"current_steps": 1689, "total_steps": 4048, "loss": 0.3336431682109833, "lr": 1.349899109766609e-05, "epoch": 0.8347955022859261, "percentage": 41.72, "elapsed_time": "6:06:01", "remaining_time": "8:31:13"} +{"current_steps": 1690, "total_steps": 4048, "loss": 0.2848295569419861, "lr": 1.3491335822957665e-05, "epoch": 0.8352897565797602, "percentage": 41.75, "elapsed_time": "6:06:15", "remaining_time": "8:31:01"} +{"current_steps": 1691, "total_steps": 4048, "loss": 0.3164542019367218, "lr": 1.3483678217483327e-05, "epoch": 0.8357840108735944, "percentage": 41.77, "elapsed_time": "6:06:29", "remaining_time": "8:30:49"} +{"current_steps": 1692, "total_steps": 4048, "loss": 0.3030688762664795, "lr": 1.3476018286355189e-05, "epoch": 0.8362782651674286, "percentage": 41.8, "elapsed_time": "6:06:42", "remaining_time": "8:30:37"} +{"current_steps": 1693, "total_steps": 4048, "loss": 0.30218198895454407, "lr": 1.3468356034686912e-05, "epoch": 0.8367725194612629, "percentage": 41.82, "elapsed_time": "6:06:55", "remaining_time": "8:30:24"} +{"current_steps": 1694, "total_steps": 4048, "loss": 0.3327499032020569, "lr": 1.3460691467593697e-05, "epoch": 0.837266773755097, "percentage": 41.85, "elapsed_time": "6:07:09", "remaining_time": "8:30:13"} +{"current_steps": 1695, "total_steps": 4048, "loss": 0.29298892617225647, "lr": 1.3453024590192307e-05, "epoch": 0.8377610280489312, "percentage": 41.87, "elapsed_time": "6:07:22", "remaining_time": "8:30:00"} +{"current_steps": 1696, "total_steps": 4048, "loss": 0.3096858859062195, "lr": 1.344535540760104e-05, "epoch": 0.8382552823427654, "percentage": 41.9, "elapsed_time": "6:07:36", "remaining_time": "8:29:48"} +{"current_steps": 1697, "total_steps": 4048, "loss": 0.30680233240127563, "lr": 1.3437683924939731e-05, "epoch": 0.8387495366365996, "percentage": 41.92, "elapsed_time": "6:07:49", "remaining_time": "8:29:35"} +{"current_steps": 1698, "total_steps": 4048, "loss": 0.3139989972114563, "lr": 1.3430010147329752e-05, "epoch": 0.8392437909304337, "percentage": 41.95, "elapsed_time": "6:08:03", "remaining_time": "8:29:23"} +{"current_steps": 1699, "total_steps": 4048, "loss": 0.30418652296066284, "lr": 1.3422334079894008e-05, "epoch": 0.8397380452242679, "percentage": 41.97, "elapsed_time": "6:08:16", "remaining_time": "8:29:09"} +{"current_steps": 1700, "total_steps": 4048, "loss": 0.31245100498199463, "lr": 1.3414655727756931e-05, "epoch": 0.8402322995181021, "percentage": 42.0, "elapsed_time": "6:08:29", "remaining_time": "8:28:57"} +{"current_steps": 1701, "total_steps": 4048, "loss": 0.3381880223751068, "lr": 1.3406975096044477e-05, "epoch": 0.8407265538119363, "percentage": 42.02, "elapsed_time": "6:08:49", "remaining_time": "8:28:53"} +{"current_steps": 1702, "total_steps": 4048, "loss": 0.3359968960285187, "lr": 1.3399292189884135e-05, "epoch": 0.8412208081057704, "percentage": 42.05, "elapsed_time": "6:09:02", "remaining_time": "8:28:40"} +{"current_steps": 1703, "total_steps": 4048, "loss": 0.3320350646972656, "lr": 1.3391607014404891e-05, "epoch": 0.8417150623996046, "percentage": 42.07, "elapsed_time": "6:09:15", "remaining_time": "8:28:27"} +{"current_steps": 1704, "total_steps": 4048, "loss": 0.32830795645713806, "lr": 1.3383919574737267e-05, "epoch": 0.8422093166934388, "percentage": 42.09, "elapsed_time": "6:09:28", "remaining_time": "8:28:14"} +{"current_steps": 1705, "total_steps": 4048, "loss": 0.255840927362442, "lr": 1.3376229876013285e-05, "epoch": 0.842703570987273, "percentage": 42.12, "elapsed_time": "6:09:40", "remaining_time": "8:28:00"} +{"current_steps": 1706, "total_steps": 4048, "loss": 0.3110755681991577, "lr": 1.3368537923366476e-05, "epoch": 0.8431978252811071, "percentage": 42.14, "elapsed_time": "6:09:52", "remaining_time": "8:27:46"} +{"current_steps": 1707, "total_steps": 4048, "loss": 0.28063881397247314, "lr": 1.336084372193188e-05, "epoch": 0.8436920795749413, "percentage": 42.17, "elapsed_time": "6:10:05", "remaining_time": "8:27:33"} +{"current_steps": 1708, "total_steps": 4048, "loss": 0.31297358870506287, "lr": 1.3353147276846042e-05, "epoch": 0.8441863338687755, "percentage": 42.19, "elapsed_time": "6:10:17", "remaining_time": "8:27:18"} +{"current_steps": 1709, "total_steps": 4048, "loss": 0.30750149488449097, "lr": 1.3345448593246986e-05, "epoch": 0.8446805881626097, "percentage": 42.22, "elapsed_time": "6:10:30", "remaining_time": "8:27:06"} +{"current_steps": 1710, "total_steps": 4048, "loss": 0.2665224075317383, "lr": 1.333774767627425e-05, "epoch": 0.8451748424564438, "percentage": 42.24, "elapsed_time": "6:10:43", "remaining_time": "8:26:52"} +{"current_steps": 1711, "total_steps": 4048, "loss": 0.28920280933380127, "lr": 1.3330044531068858e-05, "epoch": 0.845669096750278, "percentage": 42.27, "elapsed_time": "6:10:56", "remaining_time": "8:26:39"} +{"current_steps": 1712, "total_steps": 4048, "loss": 0.2678643465042114, "lr": 1.332233916277332e-05, "epoch": 0.8461633510441122, "percentage": 42.29, "elapsed_time": "6:11:08", "remaining_time": "8:26:25"} +{"current_steps": 1713, "total_steps": 4048, "loss": 0.33682242035865784, "lr": 1.3314631576531623e-05, "epoch": 0.8466576053379464, "percentage": 42.32, "elapsed_time": "6:11:21", "remaining_time": "8:26:12"} +{"current_steps": 1714, "total_steps": 4048, "loss": 0.36704546213150024, "lr": 1.330692177748925e-05, "epoch": 0.8471518596317805, "percentage": 42.34, "elapsed_time": "6:11:34", "remaining_time": "8:25:58"} +{"current_steps": 1715, "total_steps": 4048, "loss": 0.3183630108833313, "lr": 1.3299209770793144e-05, "epoch": 0.8476461139256147, "percentage": 42.37, "elapsed_time": "6:11:47", "remaining_time": "8:25:45"} +{"current_steps": 1716, "total_steps": 4048, "loss": 0.27138596773147583, "lr": 1.3291495561591736e-05, "epoch": 0.8481403682194489, "percentage": 42.39, "elapsed_time": "6:11:59", "remaining_time": "8:25:32"} +{"current_steps": 1717, "total_steps": 4048, "loss": 0.30252328515052795, "lr": 1.3283779155034925e-05, "epoch": 0.848634622513283, "percentage": 42.42, "elapsed_time": "6:12:12", "remaining_time": "8:25:18"} +{"current_steps": 1718, "total_steps": 4048, "loss": 0.29494598507881165, "lr": 1.3276060556274067e-05, "epoch": 0.8491288768071172, "percentage": 42.44, "elapsed_time": "6:12:25", "remaining_time": "8:25:05"} +{"current_steps": 1719, "total_steps": 4048, "loss": 0.2822422981262207, "lr": 1.3268339770461988e-05, "epoch": 0.8496231311009514, "percentage": 42.47, "elapsed_time": "6:12:38", "remaining_time": "8:24:52"} +{"current_steps": 1720, "total_steps": 4048, "loss": 0.3348005712032318, "lr": 1.3260616802752979e-05, "epoch": 0.8501173853947857, "percentage": 42.49, "elapsed_time": "6:12:51", "remaining_time": "8:24:39"} +{"current_steps": 1721, "total_steps": 4048, "loss": 0.3146229088306427, "lr": 1.3252891658302782e-05, "epoch": 0.8506116396886197, "percentage": 42.51, "elapsed_time": "6:13:04", "remaining_time": "8:24:26"} +{"current_steps": 1722, "total_steps": 4048, "loss": 0.34189414978027344, "lr": 1.3245164342268592e-05, "epoch": 0.851105893982454, "percentage": 42.54, "elapsed_time": "6:13:17", "remaining_time": "8:24:13"} +{"current_steps": 1723, "total_steps": 4048, "loss": 0.2967323958873749, "lr": 1.3237434859809055e-05, "epoch": 0.8516001482762882, "percentage": 42.56, "elapsed_time": "6:13:30", "remaining_time": "8:24:00"} +{"current_steps": 1724, "total_steps": 4048, "loss": 0.329689085483551, "lr": 1.3229703216084262e-05, "epoch": 0.8520944025701224, "percentage": 42.59, "elapsed_time": "6:13:43", "remaining_time": "8:23:47"} +{"current_steps": 1725, "total_steps": 4048, "loss": 0.33041107654571533, "lr": 1.3221969416255751e-05, "epoch": 0.8525886568639565, "percentage": 42.61, "elapsed_time": "6:13:56", "remaining_time": "8:23:34"} +{"current_steps": 1726, "total_steps": 4048, "loss": 0.30197203159332275, "lr": 1.321423346548649e-05, "epoch": 0.8530829111577907, "percentage": 42.64, "elapsed_time": "6:14:09", "remaining_time": "8:23:21"} +{"current_steps": 1727, "total_steps": 4048, "loss": 0.29060906171798706, "lr": 1.3206495368940897e-05, "epoch": 0.8535771654516249, "percentage": 42.66, "elapsed_time": "6:14:22", "remaining_time": "8:23:07"} +{"current_steps": 1728, "total_steps": 4048, "loss": 0.3119436502456665, "lr": 1.3198755131784808e-05, "epoch": 0.8540714197454591, "percentage": 42.69, "elapsed_time": "6:14:35", "remaining_time": "8:22:55"} +{"current_steps": 1729, "total_steps": 4048, "loss": 0.35256415605545044, "lr": 1.31910127591855e-05, "epoch": 0.8545656740392932, "percentage": 42.71, "elapsed_time": "6:14:48", "remaining_time": "8:22:42"} +{"current_steps": 1730, "total_steps": 4048, "loss": 0.3093785345554352, "lr": 1.3183268256311665e-05, "epoch": 0.8550599283331274, "percentage": 42.74, "elapsed_time": "6:15:01", "remaining_time": "8:22:29"} +{"current_steps": 1731, "total_steps": 4048, "loss": 0.2713086009025574, "lr": 1.317552162833343e-05, "epoch": 0.8555541826269616, "percentage": 42.76, "elapsed_time": "6:15:14", "remaining_time": "8:22:15"} +{"current_steps": 1732, "total_steps": 4048, "loss": 0.3135699927806854, "lr": 1.3167772880422325e-05, "epoch": 0.8560484369207958, "percentage": 42.79, "elapsed_time": "6:15:26", "remaining_time": "8:22:02"} +{"current_steps": 1733, "total_steps": 4048, "loss": 0.3077283501625061, "lr": 1.3160022017751308e-05, "epoch": 0.8565426912146299, "percentage": 42.81, "elapsed_time": "6:15:39", "remaining_time": "8:21:48"} +{"current_steps": 1734, "total_steps": 4048, "loss": 0.2900918424129486, "lr": 1.3152269045494744e-05, "epoch": 0.8570369455084641, "percentage": 42.84, "elapsed_time": "6:15:52", "remaining_time": "8:21:35"} +{"current_steps": 1735, "total_steps": 4048, "loss": 0.30828869342803955, "lr": 1.3144513968828406e-05, "epoch": 0.8575311998022983, "percentage": 42.86, "elapsed_time": "6:16:04", "remaining_time": "8:21:22"} +{"current_steps": 1736, "total_steps": 4048, "loss": 0.32526400685310364, "lr": 1.3136756792929469e-05, "epoch": 0.8580254540961325, "percentage": 42.89, "elapsed_time": "6:16:17", "remaining_time": "8:21:09"} +{"current_steps": 1737, "total_steps": 4048, "loss": 0.35023608803749084, "lr": 1.3128997522976518e-05, "epoch": 0.8585197083899666, "percentage": 42.91, "elapsed_time": "6:16:30", "remaining_time": "8:20:55"} +{"current_steps": 1738, "total_steps": 4048, "loss": 0.27287641167640686, "lr": 1.312123616414953e-05, "epoch": 0.8590139626838008, "percentage": 42.93, "elapsed_time": "6:16:43", "remaining_time": "8:20:42"} +{"current_steps": 1739, "total_steps": 4048, "loss": 0.346009761095047, "lr": 1.3113472721629871e-05, "epoch": 0.859508216977635, "percentage": 42.96, "elapsed_time": "6:16:56", "remaining_time": "8:20:29"} +{"current_steps": 1740, "total_steps": 4048, "loss": 0.3297504186630249, "lr": 1.3105707200600312e-05, "epoch": 0.8600024712714691, "percentage": 42.98, "elapsed_time": "6:17:08", "remaining_time": "8:20:15"} +{"current_steps": 1741, "total_steps": 4048, "loss": 0.29835087060928345, "lr": 1.3097939606245005e-05, "epoch": 0.8604967255653033, "percentage": 43.01, "elapsed_time": "6:17:21", "remaining_time": "8:20:02"} +{"current_steps": 1742, "total_steps": 4048, "loss": 0.31466037034988403, "lr": 1.3090169943749475e-05, "epoch": 0.8609909798591375, "percentage": 43.03, "elapsed_time": "6:17:34", "remaining_time": "8:19:49"} +{"current_steps": 1743, "total_steps": 4048, "loss": 0.32722294330596924, "lr": 1.3082398218300646e-05, "epoch": 0.8614852341529717, "percentage": 43.06, "elapsed_time": "6:17:47", "remaining_time": "8:19:36"} +{"current_steps": 1744, "total_steps": 4048, "loss": 0.2603963613510132, "lr": 1.3074624435086809e-05, "epoch": 0.8619794884468058, "percentage": 43.08, "elapsed_time": "6:18:00", "remaining_time": "8:19:23"} +{"current_steps": 1745, "total_steps": 4048, "loss": 0.3100607991218567, "lr": 1.3066848599297633e-05, "epoch": 0.86247374274064, "percentage": 43.11, "elapsed_time": "6:18:14", "remaining_time": "8:19:10"} +{"current_steps": 1746, "total_steps": 4048, "loss": 0.2772334814071655, "lr": 1.3059070716124145e-05, "epoch": 0.8629679970344742, "percentage": 43.13, "elapsed_time": "6:18:26", "remaining_time": "8:18:57"} +{"current_steps": 1747, "total_steps": 4048, "loss": 0.3097267746925354, "lr": 1.305129079075876e-05, "epoch": 0.8634622513283084, "percentage": 43.16, "elapsed_time": "6:18:39", "remaining_time": "8:18:44"} +{"current_steps": 1748, "total_steps": 4048, "loss": 0.24734097719192505, "lr": 1.304350882839524e-05, "epoch": 0.8639565056221425, "percentage": 43.18, "elapsed_time": "6:18:53", "remaining_time": "8:18:32"} +{"current_steps": 1749, "total_steps": 4048, "loss": 0.32148587703704834, "lr": 1.3035724834228713e-05, "epoch": 0.8644507599159768, "percentage": 43.21, "elapsed_time": "6:19:06", "remaining_time": "8:18:19"} +{"current_steps": 1750, "total_steps": 4048, "loss": 0.3037404417991638, "lr": 1.3027938813455663e-05, "epoch": 0.864945014209811, "percentage": 43.23, "elapsed_time": "6:19:19", "remaining_time": "8:18:06"} +{"current_steps": 1751, "total_steps": 4048, "loss": 0.30760154128074646, "lr": 1.3020150771273925e-05, "epoch": 0.8654392685036452, "percentage": 43.26, "elapsed_time": "6:19:31", "remaining_time": "8:17:52"} +{"current_steps": 1752, "total_steps": 4048, "loss": 0.3169519305229187, "lr": 1.3012360712882681e-05, "epoch": 0.8659335227974793, "percentage": 43.28, "elapsed_time": "6:19:44", "remaining_time": "8:17:39"} +{"current_steps": 1753, "total_steps": 4048, "loss": 0.32497861981391907, "lr": 1.300456864348247e-05, "epoch": 0.8664277770913135, "percentage": 43.31, "elapsed_time": "6:19:57", "remaining_time": "8:17:26"} +{"current_steps": 1754, "total_steps": 4048, "loss": 0.3318047821521759, "lr": 1.2996774568275163e-05, "epoch": 0.8669220313851477, "percentage": 43.33, "elapsed_time": "6:20:10", "remaining_time": "8:17:13"} +{"current_steps": 1755, "total_steps": 4048, "loss": 0.32553863525390625, "lr": 1.298897849246397e-05, "epoch": 0.8674162856789819, "percentage": 43.35, "elapsed_time": "6:20:23", "remaining_time": "8:16:59"} +{"current_steps": 1756, "total_steps": 4048, "loss": 0.36457520723342896, "lr": 1.2981180421253446e-05, "epoch": 0.867910539972816, "percentage": 43.38, "elapsed_time": "6:20:36", "remaining_time": "8:16:47"} +{"current_steps": 1757, "total_steps": 4048, "loss": 0.3038361668586731, "lr": 1.2973380359849466e-05, "epoch": 0.8684047942666502, "percentage": 43.4, "elapsed_time": "6:20:49", "remaining_time": "8:16:33"} +{"current_steps": 1758, "total_steps": 4048, "loss": 0.3219846785068512, "lr": 1.2965578313459246e-05, "epoch": 0.8688990485604844, "percentage": 43.43, "elapsed_time": "6:21:02", "remaining_time": "8:16:21"} +{"current_steps": 1759, "total_steps": 4048, "loss": 0.3180781304836273, "lr": 1.2957774287291311e-05, "epoch": 0.8693933028543186, "percentage": 43.45, "elapsed_time": "6:21:15", "remaining_time": "8:16:07"} +{"current_steps": 1760, "total_steps": 4048, "loss": 0.27302947640419006, "lr": 1.2949968286555527e-05, "epoch": 0.8698875571481527, "percentage": 43.48, "elapsed_time": "6:21:29", "remaining_time": "8:15:55"} +{"current_steps": 1761, "total_steps": 4048, "loss": 0.31756314635276794, "lr": 1.2942160316463066e-05, "epoch": 0.8703818114419869, "percentage": 43.5, "elapsed_time": "6:21:42", "remaining_time": "8:15:42"} +{"current_steps": 1762, "total_steps": 4048, "loss": 0.2921680510044098, "lr": 1.2934350382226412e-05, "epoch": 0.8708760657358211, "percentage": 43.53, "elapsed_time": "6:21:55", "remaining_time": "8:15:30"} +{"current_steps": 1763, "total_steps": 4048, "loss": 0.36426985263824463, "lr": 1.2926538489059373e-05, "epoch": 0.8713703200296553, "percentage": 43.55, "elapsed_time": "6:22:08", "remaining_time": "8:15:17"} +{"current_steps": 1764, "total_steps": 4048, "loss": 0.31873831152915955, "lr": 1.2918724642177054e-05, "epoch": 0.8718645743234894, "percentage": 43.58, "elapsed_time": "6:22:21", "remaining_time": "8:15:04"} +{"current_steps": 1765, "total_steps": 4048, "loss": 0.30952733755111694, "lr": 1.2910908846795867e-05, "epoch": 0.8723588286173236, "percentage": 43.6, "elapsed_time": "6:22:35", "remaining_time": "8:14:52"} +{"current_steps": 1766, "total_steps": 4048, "loss": 0.33339035511016846, "lr": 1.2903091108133523e-05, "epoch": 0.8728530829111578, "percentage": 43.63, "elapsed_time": "6:22:47", "remaining_time": "8:14:38"} +{"current_steps": 1767, "total_steps": 4048, "loss": 0.31531351804733276, "lr": 1.2895271431409038e-05, "epoch": 0.8733473372049919, "percentage": 43.65, "elapsed_time": "6:23:01", "remaining_time": "8:14:26"} +{"current_steps": 1768, "total_steps": 4048, "loss": 0.3016526401042938, "lr": 1.2887449821842713e-05, "epoch": 0.8738415914988261, "percentage": 43.68, "elapsed_time": "6:23:13", "remaining_time": "8:14:12"} +{"current_steps": 1769, "total_steps": 4048, "loss": 0.3364630341529846, "lr": 1.2879626284656141e-05, "epoch": 0.8743358457926603, "percentage": 43.7, "elapsed_time": "6:23:27", "remaining_time": "8:14:00"} +{"current_steps": 1770, "total_steps": 4048, "loss": 0.29755398631095886, "lr": 1.287180082507221e-05, "epoch": 0.8748301000864945, "percentage": 43.73, "elapsed_time": "6:23:40", "remaining_time": "8:13:47"} +{"current_steps": 1771, "total_steps": 4048, "loss": 0.2986103892326355, "lr": 1.286397344831508e-05, "epoch": 0.8753243543803286, "percentage": 43.75, "elapsed_time": "6:23:53", "remaining_time": "8:13:34"} +{"current_steps": 1772, "total_steps": 4048, "loss": 0.31291434168815613, "lr": 1.2856144159610197e-05, "epoch": 0.8758186086741628, "percentage": 43.77, "elapsed_time": "6:24:06", "remaining_time": "8:13:21"} +{"current_steps": 1773, "total_steps": 4048, "loss": 0.28285568952560425, "lr": 1.2848312964184283e-05, "epoch": 0.876312862967997, "percentage": 43.8, "elapsed_time": "6:24:19", "remaining_time": "8:13:09"} +{"current_steps": 1774, "total_steps": 4048, "loss": 0.3319891095161438, "lr": 1.2840479867265331e-05, "epoch": 0.8768071172618312, "percentage": 43.82, "elapsed_time": "6:24:32", "remaining_time": "8:12:55"} +{"current_steps": 1775, "total_steps": 4048, "loss": 0.3265117406845093, "lr": 1.2832644874082604e-05, "epoch": 0.8773013715556653, "percentage": 43.85, "elapsed_time": "6:24:46", "remaining_time": "8:12:43"} +{"current_steps": 1776, "total_steps": 4048, "loss": 0.32061511278152466, "lr": 1.2824807989866635e-05, "epoch": 0.8777956258494996, "percentage": 43.87, "elapsed_time": "6:24:59", "remaining_time": "8:12:30"} +{"current_steps": 1777, "total_steps": 4048, "loss": 0.34278666973114014, "lr": 1.2816969219849214e-05, "epoch": 0.8782898801433338, "percentage": 43.9, "elapsed_time": "6:25:11", "remaining_time": "8:12:16"} +{"current_steps": 1778, "total_steps": 4048, "loss": 0.28335195779800415, "lr": 1.2809128569263387e-05, "epoch": 0.878784134437168, "percentage": 43.92, "elapsed_time": "6:25:25", "remaining_time": "8:12:04"} +{"current_steps": 1779, "total_steps": 4048, "loss": 0.35037046670913696, "lr": 1.2801286043343468e-05, "epoch": 0.8792783887310021, "percentage": 43.95, "elapsed_time": "6:25:38", "remaining_time": "8:11:52"} +{"current_steps": 1780, "total_steps": 4048, "loss": 0.30058878660202026, "lr": 1.2793441647325012e-05, "epoch": 0.8797726430248363, "percentage": 43.97, "elapsed_time": "6:25:52", "remaining_time": "8:11:39"} +{"current_steps": 1781, "total_steps": 4048, "loss": 0.29526466131210327, "lr": 1.2785595386444824e-05, "epoch": 0.8802668973186705, "percentage": 44.0, "elapsed_time": "6:26:05", "remaining_time": "8:11:26"} +{"current_steps": 1782, "total_steps": 4048, "loss": 0.3194332718849182, "lr": 1.2777747265940956e-05, "epoch": 0.8807611516125047, "percentage": 44.02, "elapsed_time": "6:26:18", "remaining_time": "8:11:14"} +{"current_steps": 1783, "total_steps": 4048, "loss": 0.33527326583862305, "lr": 1.2769897291052709e-05, "epoch": 0.8812554059063388, "percentage": 44.05, "elapsed_time": "6:26:31", "remaining_time": "8:11:01"} +{"current_steps": 1784, "total_steps": 4048, "loss": 0.3277815580368042, "lr": 1.2762045467020601e-05, "epoch": 0.881749660200173, "percentage": 44.07, "elapsed_time": "6:26:45", "remaining_time": "8:10:49"} +{"current_steps": 1785, "total_steps": 4048, "loss": 0.31030380725860596, "lr": 1.2754191799086406e-05, "epoch": 0.8822439144940072, "percentage": 44.1, "elapsed_time": "6:26:58", "remaining_time": "8:10:36"} +{"current_steps": 1786, "total_steps": 4048, "loss": 0.34496408700942993, "lr": 1.274633629249312e-05, "epoch": 0.8827381687878414, "percentage": 44.12, "elapsed_time": "6:27:11", "remaining_time": "8:10:23"} +{"current_steps": 1787, "total_steps": 4048, "loss": 0.31008201837539673, "lr": 1.2738478952484964e-05, "epoch": 0.8832324230816755, "percentage": 44.15, "elapsed_time": "6:27:24", "remaining_time": "8:10:09"} +{"current_steps": 1788, "total_steps": 4048, "loss": 0.35956043004989624, "lr": 1.2730619784307388e-05, "epoch": 0.8837266773755097, "percentage": 44.17, "elapsed_time": "6:27:37", "remaining_time": "8:09:56"} +{"current_steps": 1789, "total_steps": 4048, "loss": 0.2944573760032654, "lr": 1.272275879320706e-05, "epoch": 0.8842209316693439, "percentage": 44.19, "elapsed_time": "6:27:50", "remaining_time": "8:09:44"} +{"current_steps": 1790, "total_steps": 4048, "loss": 0.2941366136074066, "lr": 1.2714895984431863e-05, "epoch": 0.8847151859631781, "percentage": 44.22, "elapsed_time": "6:28:04", "remaining_time": "8:09:31"} +{"current_steps": 1791, "total_steps": 4048, "loss": 0.34683144092559814, "lr": 1.2707031363230901e-05, "epoch": 0.8852094402570122, "percentage": 44.24, "elapsed_time": "6:28:16", "remaining_time": "8:09:17"} +{"current_steps": 1792, "total_steps": 4048, "loss": 0.3014514744281769, "lr": 1.2699164934854475e-05, "epoch": 0.8857036945508464, "percentage": 44.27, "elapsed_time": "6:28:29", "remaining_time": "8:09:04"} +{"current_steps": 1793, "total_steps": 4048, "loss": 0.2749955654144287, "lr": 1.2691296704554112e-05, "epoch": 0.8861979488446806, "percentage": 44.29, "elapsed_time": "6:28:41", "remaining_time": "8:08:50"} +{"current_steps": 1794, "total_steps": 4048, "loss": 0.3707960844039917, "lr": 1.2683426677582518e-05, "epoch": 0.8866922031385147, "percentage": 44.32, "elapsed_time": "6:28:54", "remaining_time": "8:08:37"} +{"current_steps": 1795, "total_steps": 4048, "loss": 0.3122541606426239, "lr": 1.2675554859193615e-05, "epoch": 0.8871864574323489, "percentage": 44.34, "elapsed_time": "6:29:07", "remaining_time": "8:08:24"} +{"current_steps": 1796, "total_steps": 4048, "loss": 0.3072753846645355, "lr": 1.2667681254642521e-05, "epoch": 0.8876807117261831, "percentage": 44.37, "elapsed_time": "6:29:19", "remaining_time": "8:08:10"} +{"current_steps": 1797, "total_steps": 4048, "loss": 0.27002331614494324, "lr": 1.2659805869185534e-05, "epoch": 0.8881749660200173, "percentage": 44.39, "elapsed_time": "6:29:32", "remaining_time": "8:07:57"} +{"current_steps": 1798, "total_steps": 4048, "loss": 0.2775167226791382, "lr": 1.2651928708080155e-05, "epoch": 0.8886692203138514, "percentage": 44.42, "elapsed_time": "6:29:45", "remaining_time": "8:07:44"} +{"current_steps": 1799, "total_steps": 4048, "loss": 0.30023425817489624, "lr": 1.2644049776585061e-05, "epoch": 0.8891634746076856, "percentage": 44.44, "elapsed_time": "6:29:58", "remaining_time": "8:07:31"} +{"current_steps": 1800, "total_steps": 4048, "loss": 0.29491451382637024, "lr": 1.2636169079960116e-05, "epoch": 0.8896577289015198, "percentage": 44.47, "elapsed_time": "6:30:11", "remaining_time": "8:07:17"} +{"current_steps": 1801, "total_steps": 4048, "loss": 0.3069722652435303, "lr": 1.2628286623466359e-05, "epoch": 0.890151983195354, "percentage": 44.49, "elapsed_time": "6:30:29", "remaining_time": "8:07:11"} +{"current_steps": 1802, "total_steps": 4048, "loss": 0.30594444274902344, "lr": 1.2620402412366006e-05, "epoch": 0.8906462374891881, "percentage": 44.52, "elapsed_time": "6:30:42", "remaining_time": "8:06:58"} +{"current_steps": 1803, "total_steps": 4048, "loss": 0.278346985578537, "lr": 1.2612516451922442e-05, "epoch": 0.8911404917830223, "percentage": 44.54, "elapsed_time": "6:30:55", "remaining_time": "8:06:45"} +{"current_steps": 1804, "total_steps": 4048, "loss": 0.2985970973968506, "lr": 1.2604628747400227e-05, "epoch": 0.8916347460768566, "percentage": 44.57, "elapsed_time": "6:31:08", "remaining_time": "8:06:32"} +{"current_steps": 1805, "total_steps": 4048, "loss": 0.31054627895355225, "lr": 1.259673930406507e-05, "epoch": 0.8921290003706908, "percentage": 44.59, "elapsed_time": "6:31:22", "remaining_time": "8:06:20"} +{"current_steps": 1806, "total_steps": 4048, "loss": 0.28903907537460327, "lr": 1.258884812718386e-05, "epoch": 0.8926232546645249, "percentage": 44.61, "elapsed_time": "6:31:35", "remaining_time": "8:06:07"} +{"current_steps": 1807, "total_steps": 4048, "loss": 0.2937915027141571, "lr": 1.258095522202463e-05, "epoch": 0.8931175089583591, "percentage": 44.64, "elapsed_time": "6:31:48", "remaining_time": "8:05:54"} +{"current_steps": 1808, "total_steps": 4048, "loss": 0.3038950562477112, "lr": 1.257306059385657e-05, "epoch": 0.8936117632521933, "percentage": 44.66, "elapsed_time": "6:32:01", "remaining_time": "8:05:42"} +{"current_steps": 1809, "total_steps": 4048, "loss": 0.3081057071685791, "lr": 1.2565164247950023e-05, "epoch": 0.8941060175460275, "percentage": 44.69, "elapsed_time": "6:32:14", "remaining_time": "8:05:28"} +{"current_steps": 1810, "total_steps": 4048, "loss": 0.2608702480792999, "lr": 1.2557266189576478e-05, "epoch": 0.8946002718398616, "percentage": 44.71, "elapsed_time": "6:32:27", "remaining_time": "8:05:15"} +{"current_steps": 1811, "total_steps": 4048, "loss": 0.2829548120498657, "lr": 1.254936642400856e-05, "epoch": 0.8950945261336958, "percentage": 44.74, "elapsed_time": "6:32:40", "remaining_time": "8:05:02"} +{"current_steps": 1812, "total_steps": 4048, "loss": 0.3157985508441925, "lr": 1.2541464956520045e-05, "epoch": 0.89558878042753, "percentage": 44.76, "elapsed_time": "6:32:53", "remaining_time": "8:04:49"} +{"current_steps": 1813, "total_steps": 4048, "loss": 0.2948974370956421, "lr": 1.2533561792385837e-05, "epoch": 0.8960830347213642, "percentage": 44.79, "elapsed_time": "6:33:05", "remaining_time": "8:04:35"} +{"current_steps": 1814, "total_steps": 4048, "loss": 0.3011903166770935, "lr": 1.252565693688198e-05, "epoch": 0.8965772890151983, "percentage": 44.81, "elapsed_time": "6:33:19", "remaining_time": "8:04:23"} +{"current_steps": 1815, "total_steps": 4048, "loss": 0.3570353388786316, "lr": 1.2517750395285635e-05, "epoch": 0.8970715433090325, "percentage": 44.84, "elapsed_time": "6:33:32", "remaining_time": "8:04:10"} +{"current_steps": 1816, "total_steps": 4048, "loss": 0.30166712403297424, "lr": 1.2509842172875105e-05, "epoch": 0.8975657976028667, "percentage": 44.86, "elapsed_time": "6:33:45", "remaining_time": "8:03:58"} +{"current_steps": 1817, "total_steps": 4048, "loss": 0.3260636329650879, "lr": 1.2501932274929797e-05, "epoch": 0.8980600518967009, "percentage": 44.89, "elapsed_time": "6:33:59", "remaining_time": "8:03:45"} +{"current_steps": 1818, "total_steps": 4048, "loss": 0.31647035479545593, "lr": 1.2494020706730251e-05, "epoch": 0.898554306190535, "percentage": 44.91, "elapsed_time": "6:34:12", "remaining_time": "8:03:32"} +{"current_steps": 1819, "total_steps": 4048, "loss": 0.3059273064136505, "lr": 1.2486107473558118e-05, "epoch": 0.8990485604843692, "percentage": 44.94, "elapsed_time": "6:34:25", "remaining_time": "8:03:19"} +{"current_steps": 1820, "total_steps": 4048, "loss": 0.31050577759742737, "lr": 1.247819258069616e-05, "epoch": 0.8995428147782034, "percentage": 44.96, "elapsed_time": "6:34:38", "remaining_time": "8:03:06"} +{"current_steps": 1821, "total_steps": 4048, "loss": 0.3199779689311981, "lr": 1.2470276033428241e-05, "epoch": 0.9000370690720375, "percentage": 44.99, "elapsed_time": "6:34:51", "remaining_time": "8:02:53"} +{"current_steps": 1822, "total_steps": 4048, "loss": 0.31346091628074646, "lr": 1.2462357837039338e-05, "epoch": 0.9005313233658717, "percentage": 45.01, "elapsed_time": "6:35:04", "remaining_time": "8:02:40"} +{"current_steps": 1823, "total_steps": 4048, "loss": 0.31128326058387756, "lr": 1.245443799681553e-05, "epoch": 0.9010255776597059, "percentage": 45.03, "elapsed_time": "6:35:18", "remaining_time": "8:02:28"} +{"current_steps": 1824, "total_steps": 4048, "loss": 0.27540329098701477, "lr": 1.244651651804398e-05, "epoch": 0.9015198319535401, "percentage": 45.06, "elapsed_time": "6:35:31", "remaining_time": "8:02:15"} +{"current_steps": 1825, "total_steps": 4048, "loss": 0.2613363265991211, "lr": 1.243859340601296e-05, "epoch": 0.9020140862473742, "percentage": 45.08, "elapsed_time": "6:35:44", "remaining_time": "8:02:03"} +{"current_steps": 1826, "total_steps": 4048, "loss": 0.30530184507369995, "lr": 1.2430668666011825e-05, "epoch": 0.9025083405412084, "percentage": 45.11, "elapsed_time": "6:35:57", "remaining_time": "8:01:50"} +{"current_steps": 1827, "total_steps": 4048, "loss": 0.3223349153995514, "lr": 1.2422742303331022e-05, "epoch": 0.9030025948350426, "percentage": 45.13, "elapsed_time": "6:36:11", "remaining_time": "8:01:37"} +{"current_steps": 1828, "total_steps": 4048, "loss": 0.32017287611961365, "lr": 1.2414814323262067e-05, "epoch": 0.9034968491288768, "percentage": 45.16, "elapsed_time": "6:36:24", "remaining_time": "8:01:24"} +{"current_steps": 1829, "total_steps": 4048, "loss": 0.2965891361236572, "lr": 1.2406884731097582e-05, "epoch": 0.9039911034227109, "percentage": 45.18, "elapsed_time": "6:36:38", "remaining_time": "8:01:12"} +{"current_steps": 1830, "total_steps": 4048, "loss": 0.3517727851867676, "lr": 1.2398953532131235e-05, "epoch": 0.9044853577165451, "percentage": 45.21, "elapsed_time": "6:36:51", "remaining_time": "8:00:59"} +{"current_steps": 1831, "total_steps": 4048, "loss": 0.26107311248779297, "lr": 1.2391020731657788e-05, "epoch": 0.9049796120103794, "percentage": 45.23, "elapsed_time": "6:37:05", "remaining_time": "8:00:47"} +{"current_steps": 1832, "total_steps": 4048, "loss": 0.31327998638153076, "lr": 1.2383086334973065e-05, "epoch": 0.9054738663042136, "percentage": 45.26, "elapsed_time": "6:37:18", "remaining_time": "8:00:34"} +{"current_steps": 1833, "total_steps": 4048, "loss": 0.2708127498626709, "lr": 1.2375150347373956e-05, "epoch": 0.9059681205980477, "percentage": 45.28, "elapsed_time": "6:37:30", "remaining_time": "8:00:21"} +{"current_steps": 1834, "total_steps": 4048, "loss": 0.3264025151729584, "lr": 1.236721277415841e-05, "epoch": 0.9064623748918819, "percentage": 45.31, "elapsed_time": "6:37:43", "remaining_time": "8:00:07"} +{"current_steps": 1835, "total_steps": 4048, "loss": 0.3226723074913025, "lr": 1.2359273620625438e-05, "epoch": 0.9069566291857161, "percentage": 45.33, "elapsed_time": "6:37:56", "remaining_time": "7:59:54"} +{"current_steps": 1836, "total_steps": 4048, "loss": 0.2895771861076355, "lr": 1.2351332892075109e-05, "epoch": 0.9074508834795503, "percentage": 45.36, "elapsed_time": "6:38:09", "remaining_time": "7:59:41"} +{"current_steps": 1837, "total_steps": 4048, "loss": 0.3316076397895813, "lr": 1.234339059380854e-05, "epoch": 0.9079451377733844, "percentage": 45.38, "elapsed_time": "6:38:21", "remaining_time": "7:59:27"} +{"current_steps": 1838, "total_steps": 4048, "loss": 0.29858651757240295, "lr": 1.2335446731127887e-05, "epoch": 0.9084393920672186, "percentage": 45.41, "elapsed_time": "6:38:34", "remaining_time": "7:59:14"} +{"current_steps": 1839, "total_steps": 4048, "loss": 0.31340792775154114, "lr": 1.2327501309336371e-05, "epoch": 0.9089336463610528, "percentage": 45.43, "elapsed_time": "6:38:47", "remaining_time": "7:59:01"} +{"current_steps": 1840, "total_steps": 4048, "loss": 0.27344945073127747, "lr": 1.2319554333738236e-05, "epoch": 0.909427900654887, "percentage": 45.45, "elapsed_time": "6:38:59", "remaining_time": "7:58:47"} +{"current_steps": 1841, "total_steps": 4048, "loss": 0.27349725365638733, "lr": 1.2311605809638766e-05, "epoch": 0.9099221549487211, "percentage": 45.48, "elapsed_time": "6:39:12", "remaining_time": "7:58:34"} +{"current_steps": 1842, "total_steps": 4048, "loss": 0.28933316469192505, "lr": 1.2303655742344292e-05, "epoch": 0.9104164092425553, "percentage": 45.5, "elapsed_time": "6:39:25", "remaining_time": "7:58:21"} +{"current_steps": 1843, "total_steps": 4048, "loss": 0.3315466344356537, "lr": 1.2295704137162158e-05, "epoch": 0.9109106635363895, "percentage": 45.53, "elapsed_time": "6:39:37", "remaining_time": "7:58:07"} +{"current_steps": 1844, "total_steps": 4048, "loss": 0.3227408528327942, "lr": 1.2287750999400743e-05, "epoch": 0.9114049178302237, "percentage": 45.55, "elapsed_time": "6:39:50", "remaining_time": "7:57:54"} +{"current_steps": 1845, "total_steps": 4048, "loss": 0.30476877093315125, "lr": 1.2279796334369447e-05, "epoch": 0.9118991721240578, "percentage": 45.58, "elapsed_time": "6:40:02", "remaining_time": "7:57:40"} +{"current_steps": 1846, "total_steps": 4048, "loss": 0.29941046237945557, "lr": 1.2271840147378697e-05, "epoch": 0.912393426417892, "percentage": 45.6, "elapsed_time": "6:40:15", "remaining_time": "7:57:27"} +{"current_steps": 1847, "total_steps": 4048, "loss": 0.26635122299194336, "lr": 1.2263882443739923e-05, "epoch": 0.9128876807117262, "percentage": 45.63, "elapsed_time": "6:40:28", "remaining_time": "7:57:13"} +{"current_steps": 1848, "total_steps": 4048, "loss": 0.32384809851646423, "lr": 1.2255923228765574e-05, "epoch": 0.9133819350055603, "percentage": 45.65, "elapsed_time": "6:40:40", "remaining_time": "7:57:00"} +{"current_steps": 1849, "total_steps": 4048, "loss": 0.2830178141593933, "lr": 1.2247962507769113e-05, "epoch": 0.9138761892993945, "percentage": 45.68, "elapsed_time": "6:40:54", "remaining_time": "7:56:47"} +{"current_steps": 1850, "total_steps": 4048, "loss": 0.32860931754112244, "lr": 1.2240000286065003e-05, "epoch": 0.9143704435932287, "percentage": 45.7, "elapsed_time": "6:41:06", "remaining_time": "7:56:33"} +{"current_steps": 1851, "total_steps": 4048, "loss": 0.2820647954940796, "lr": 1.2232036568968703e-05, "epoch": 0.9148646978870629, "percentage": 45.73, "elapsed_time": "6:41:19", "remaining_time": "7:56:21"} +{"current_steps": 1852, "total_steps": 4048, "loss": 0.3368694484233856, "lr": 1.2224071361796685e-05, "epoch": 0.915358952180897, "percentage": 45.75, "elapsed_time": "6:41:32", "remaining_time": "7:56:08"} +{"current_steps": 1853, "total_steps": 4048, "loss": 0.32594096660614014, "lr": 1.2216104669866405e-05, "epoch": 0.9158532064747312, "percentage": 45.78, "elapsed_time": "6:41:46", "remaining_time": "7:55:55"} +{"current_steps": 1854, "total_steps": 4048, "loss": 0.3383556008338928, "lr": 1.2208136498496307e-05, "epoch": 0.9163474607685654, "percentage": 45.8, "elapsed_time": "6:41:58", "remaining_time": "7:55:41"} +{"current_steps": 1855, "total_steps": 4048, "loss": 0.2655363976955414, "lr": 1.2200166853005837e-05, "epoch": 0.9168417150623996, "percentage": 45.83, "elapsed_time": "6:42:12", "remaining_time": "7:55:29"} +{"current_steps": 1856, "total_steps": 4048, "loss": 0.30512773990631104, "lr": 1.2192195738715414e-05, "epoch": 0.9173359693562337, "percentage": 45.85, "elapsed_time": "6:42:24", "remaining_time": "7:55:15"} +{"current_steps": 1857, "total_steps": 4048, "loss": 0.34026995301246643, "lr": 1.2184223160946433e-05, "epoch": 0.917830223650068, "percentage": 45.87, "elapsed_time": "6:42:37", "remaining_time": "7:55:02"} +{"current_steps": 1858, "total_steps": 4048, "loss": 0.29324328899383545, "lr": 1.2176249125021281e-05, "epoch": 0.9183244779439022, "percentage": 45.9, "elapsed_time": "6:42:50", "remaining_time": "7:54:49"} +{"current_steps": 1859, "total_steps": 4048, "loss": 0.3114206790924072, "lr": 1.2168273636263308e-05, "epoch": 0.9188187322377364, "percentage": 45.92, "elapsed_time": "6:43:03", "remaining_time": "7:54:35"} +{"current_steps": 1860, "total_steps": 4048, "loss": 0.2829141914844513, "lr": 1.2160296699996839e-05, "epoch": 0.9193129865315705, "percentage": 45.95, "elapsed_time": "6:43:15", "remaining_time": "7:54:22"} +{"current_steps": 1861, "total_steps": 4048, "loss": 0.2735600769519806, "lr": 1.2152318321547156e-05, "epoch": 0.9198072408254047, "percentage": 45.97, "elapsed_time": "6:43:29", "remaining_time": "7:54:10"} +{"current_steps": 1862, "total_steps": 4048, "loss": 0.3160930573940277, "lr": 1.2144338506240519e-05, "epoch": 0.9203014951192389, "percentage": 46.0, "elapsed_time": "6:43:42", "remaining_time": "7:53:57"} +{"current_steps": 1863, "total_steps": 4048, "loss": 0.26677393913269043, "lr": 1.2136357259404128e-05, "epoch": 0.9207957494130731, "percentage": 46.02, "elapsed_time": "6:43:55", "remaining_time": "7:53:44"} +{"current_steps": 1864, "total_steps": 4048, "loss": 0.33033064007759094, "lr": 1.2128374586366159e-05, "epoch": 0.9212900037069072, "percentage": 46.05, "elapsed_time": "6:44:08", "remaining_time": "7:53:31"} +{"current_steps": 1865, "total_steps": 4048, "loss": 0.28271663188934326, "lr": 1.2120390492455727e-05, "epoch": 0.9217842580007414, "percentage": 46.07, "elapsed_time": "6:44:22", "remaining_time": "7:53:19"} +{"current_steps": 1866, "total_steps": 4048, "loss": 0.3116013705730438, "lr": 1.21124049830029e-05, "epoch": 0.9222785122945756, "percentage": 46.1, "elapsed_time": "6:44:35", "remaining_time": "7:53:06"} +{"current_steps": 1867, "total_steps": 4048, "loss": 0.30614158511161804, "lr": 1.2104418063338686e-05, "epoch": 0.9227727665884098, "percentage": 46.12, "elapsed_time": "6:44:47", "remaining_time": "7:52:52"} +{"current_steps": 1868, "total_steps": 4048, "loss": 0.34351983666419983, "lr": 1.2096429738795041e-05, "epoch": 0.9232670208822439, "percentage": 46.15, "elapsed_time": "6:45:01", "remaining_time": "7:52:40"} +{"current_steps": 1869, "total_steps": 4048, "loss": 0.31006965041160583, "lr": 1.2088440014704858e-05, "epoch": 0.9237612751760781, "percentage": 46.17, "elapsed_time": "6:45:14", "remaining_time": "7:52:27"} +{"current_steps": 1870, "total_steps": 4048, "loss": 0.2671147584915161, "lr": 1.2080448896401964e-05, "epoch": 0.9242555294699123, "percentage": 46.2, "elapsed_time": "6:45:27", "remaining_time": "7:52:14"} +{"current_steps": 1871, "total_steps": 4048, "loss": 0.29123416543006897, "lr": 1.207245638922111e-05, "epoch": 0.9247497837637465, "percentage": 46.22, "elapsed_time": "6:45:40", "remaining_time": "7:52:01"} +{"current_steps": 1872, "total_steps": 4048, "loss": 0.31838539242744446, "lr": 1.2064462498497984e-05, "epoch": 0.9252440380575806, "percentage": 46.25, "elapsed_time": "6:45:53", "remaining_time": "7:51:48"} +{"current_steps": 1873, "total_steps": 4048, "loss": 0.3158906102180481, "lr": 1.205646722956919e-05, "epoch": 0.9257382923514148, "percentage": 46.27, "elapsed_time": "6:46:06", "remaining_time": "7:51:35"} +{"current_steps": 1874, "total_steps": 4048, "loss": 0.3679552674293518, "lr": 1.2048470587772257e-05, "epoch": 0.926232546645249, "percentage": 46.29, "elapsed_time": "6:46:19", "remaining_time": "7:51:21"} +{"current_steps": 1875, "total_steps": 4048, "loss": 0.2891008257865906, "lr": 1.204047257844563e-05, "epoch": 0.9267268009390831, "percentage": 46.32, "elapsed_time": "6:46:31", "remaining_time": "7:51:08"} +{"current_steps": 1876, "total_steps": 4048, "loss": 0.3207235634326935, "lr": 1.2032473206928663e-05, "epoch": 0.9272210552329173, "percentage": 46.34, "elapsed_time": "6:46:44", "remaining_time": "7:50:55"} +{"current_steps": 1877, "total_steps": 4048, "loss": 0.2710658311843872, "lr": 1.2024472478561624e-05, "epoch": 0.9277153095267515, "percentage": 46.37, "elapsed_time": "6:46:56", "remaining_time": "7:50:41"} +{"current_steps": 1878, "total_steps": 4048, "loss": 0.2554836869239807, "lr": 1.2016470398685685e-05, "epoch": 0.9282095638205857, "percentage": 46.39, "elapsed_time": "6:47:09", "remaining_time": "7:50:28"} +{"current_steps": 1879, "total_steps": 4048, "loss": 0.2822943329811096, "lr": 1.2008466972642921e-05, "epoch": 0.9287038181144198, "percentage": 46.42, "elapsed_time": "6:47:23", "remaining_time": "7:50:16"} +{"current_steps": 1880, "total_steps": 4048, "loss": 0.3447754681110382, "lr": 1.20004622057763e-05, "epoch": 0.929198072408254, "percentage": 46.44, "elapsed_time": "6:47:36", "remaining_time": "7:50:02"} +{"current_steps": 1881, "total_steps": 4048, "loss": 0.3009227514266968, "lr": 1.1992456103429694e-05, "epoch": 0.9296923267020882, "percentage": 46.47, "elapsed_time": "6:47:49", "remaining_time": "7:49:50"} +{"current_steps": 1882, "total_steps": 4048, "loss": 0.33154594898223877, "lr": 1.1984448670947863e-05, "epoch": 0.9301865809959224, "percentage": 46.49, "elapsed_time": "6:48:03", "remaining_time": "7:49:37"} +{"current_steps": 1883, "total_steps": 4048, "loss": 0.32905343174934387, "lr": 1.1976439913676457e-05, "epoch": 0.9306808352897565, "percentage": 46.52, "elapsed_time": "6:48:17", "remaining_time": "7:49:25"} +{"current_steps": 1884, "total_steps": 4048, "loss": 0.34757447242736816, "lr": 1.1968429836962e-05, "epoch": 0.9311750895835907, "percentage": 46.54, "elapsed_time": "6:48:30", "remaining_time": "7:49:13"} +{"current_steps": 1885, "total_steps": 4048, "loss": 0.29980987310409546, "lr": 1.1960418446151912e-05, "epoch": 0.931669343877425, "percentage": 46.57, "elapsed_time": "6:48:44", "remaining_time": "7:49:01"} +{"current_steps": 1886, "total_steps": 4048, "loss": 0.3106808662414551, "lr": 1.1952405746594477e-05, "epoch": 0.9321635981712592, "percentage": 46.59, "elapsed_time": "6:48:57", "remaining_time": "7:48:48"} +{"current_steps": 1887, "total_steps": 4048, "loss": 0.3222411572933197, "lr": 1.1944391743638863e-05, "epoch": 0.9326578524650933, "percentage": 46.62, "elapsed_time": "6:49:11", "remaining_time": "7:48:36"} +{"current_steps": 1888, "total_steps": 4048, "loss": 0.3365646302700043, "lr": 1.1936376442635104e-05, "epoch": 0.9331521067589275, "percentage": 46.64, "elapsed_time": "6:49:24", "remaining_time": "7:48:23"} +{"current_steps": 1889, "total_steps": 4048, "loss": 0.32500627636909485, "lr": 1.1928359848934101e-05, "epoch": 0.9336463610527617, "percentage": 46.67, "elapsed_time": "6:49:37", "remaining_time": "7:48:10"} +{"current_steps": 1890, "total_steps": 4048, "loss": 0.31395500898361206, "lr": 1.1920341967887614e-05, "epoch": 0.9341406153465959, "percentage": 46.69, "elapsed_time": "6:49:51", "remaining_time": "7:47:58"} +{"current_steps": 1891, "total_steps": 4048, "loss": 0.3060624301433563, "lr": 1.1912322804848268e-05, "epoch": 0.93463486964043, "percentage": 46.71, "elapsed_time": "6:50:04", "remaining_time": "7:47:45"} +{"current_steps": 1892, "total_steps": 4048, "loss": 0.2644454836845398, "lr": 1.190430236516954e-05, "epoch": 0.9351291239342642, "percentage": 46.74, "elapsed_time": "6:50:18", "remaining_time": "7:47:33"} +{"current_steps": 1893, "total_steps": 4048, "loss": 0.33404678106307983, "lr": 1.1896280654205765e-05, "epoch": 0.9356233782280984, "percentage": 46.76, "elapsed_time": "6:50:31", "remaining_time": "7:47:20"} +{"current_steps": 1894, "total_steps": 4048, "loss": 0.28557512164115906, "lr": 1.1888257677312119e-05, "epoch": 0.9361176325219326, "percentage": 46.79, "elapsed_time": "6:50:45", "remaining_time": "7:47:08"} +{"current_steps": 1895, "total_steps": 4048, "loss": 0.3332308530807495, "lr": 1.1880233439844623e-05, "epoch": 0.9366118868157667, "percentage": 46.81, "elapsed_time": "6:50:58", "remaining_time": "7:46:55"} +{"current_steps": 1896, "total_steps": 4048, "loss": 0.3274528384208679, "lr": 1.1872207947160155e-05, "epoch": 0.9371061411096009, "percentage": 46.84, "elapsed_time": "6:51:11", "remaining_time": "7:46:42"} +{"current_steps": 1897, "total_steps": 4048, "loss": 0.297880083322525, "lr": 1.1864181204616404e-05, "epoch": 0.9376003954034351, "percentage": 46.86, "elapsed_time": "6:51:24", "remaining_time": "7:46:29"} +{"current_steps": 1898, "total_steps": 4048, "loss": 0.3404296040534973, "lr": 1.1856153217571924e-05, "epoch": 0.9380946496972693, "percentage": 46.89, "elapsed_time": "6:51:37", "remaining_time": "7:46:17"} +{"current_steps": 1899, "total_steps": 4048, "loss": 0.32343849539756775, "lr": 1.1848123991386073e-05, "epoch": 0.9385889039911034, "percentage": 46.91, "elapsed_time": "6:51:50", "remaining_time": "7:46:04"} +{"current_steps": 1900, "total_steps": 4048, "loss": 0.26679158210754395, "lr": 1.1840093531419052e-05, "epoch": 0.9390831582849376, "percentage": 46.94, "elapsed_time": "6:52:04", "remaining_time": "7:45:52"} +{"current_steps": 1901, "total_steps": 4048, "loss": 0.28106996417045593, "lr": 1.1832061843031884e-05, "epoch": 0.9395774125787718, "percentage": 46.96, "elapsed_time": "6:52:24", "remaining_time": "7:45:46"} +{"current_steps": 1902, "total_steps": 4048, "loss": 0.28356847167015076, "lr": 1.1824028931586406e-05, "epoch": 0.9400716668726059, "percentage": 46.99, "elapsed_time": "6:52:38", "remaining_time": "7:45:34"} +{"current_steps": 1903, "total_steps": 4048, "loss": 0.3256348669528961, "lr": 1.1815994802445274e-05, "epoch": 0.9405659211664401, "percentage": 47.01, "elapsed_time": "6:52:51", "remaining_time": "7:45:21"} +{"current_steps": 1904, "total_steps": 4048, "loss": 0.2781906723976135, "lr": 1.1807959460971958e-05, "epoch": 0.9410601754602743, "percentage": 47.04, "elapsed_time": "6:53:04", "remaining_time": "7:45:08"} +{"current_steps": 1905, "total_steps": 4048, "loss": 0.3129916787147522, "lr": 1.1799922912530741e-05, "epoch": 0.9415544297541085, "percentage": 47.06, "elapsed_time": "6:53:17", "remaining_time": "7:44:55"} +{"current_steps": 1906, "total_steps": 4048, "loss": 0.281986266374588, "lr": 1.1791885162486705e-05, "epoch": 0.9420486840479426, "percentage": 47.08, "elapsed_time": "6:53:30", "remaining_time": "7:44:42"} +{"current_steps": 1907, "total_steps": 4048, "loss": 0.33587342500686646, "lr": 1.1783846216205734e-05, "epoch": 0.9425429383417768, "percentage": 47.11, "elapsed_time": "6:53:43", "remaining_time": "7:44:29"} +{"current_steps": 1908, "total_steps": 4048, "loss": 0.27715635299682617, "lr": 1.1775806079054522e-05, "epoch": 0.943037192635611, "percentage": 47.13, "elapsed_time": "6:53:56", "remaining_time": "7:44:16"} +{"current_steps": 1909, "total_steps": 4048, "loss": 0.3190307915210724, "lr": 1.1767764756400541e-05, "epoch": 0.9435314469294452, "percentage": 47.16, "elapsed_time": "6:54:09", "remaining_time": "7:44:03"} +{"current_steps": 1910, "total_steps": 4048, "loss": 0.29336807131767273, "lr": 1.175972225361207e-05, "epoch": 0.9440257012232793, "percentage": 47.18, "elapsed_time": "6:54:22", "remaining_time": "7:43:50"} +{"current_steps": 1911, "total_steps": 4048, "loss": 0.3001596927642822, "lr": 1.1751678576058164e-05, "epoch": 0.9445199555171135, "percentage": 47.21, "elapsed_time": "6:54:36", "remaining_time": "7:43:38"} +{"current_steps": 1912, "total_steps": 4048, "loss": 0.26952457427978516, "lr": 1.1743633729108672e-05, "epoch": 0.9450142098109477, "percentage": 47.23, "elapsed_time": "6:54:49", "remaining_time": "7:43:25"} +{"current_steps": 1913, "total_steps": 4048, "loss": 0.3193609118461609, "lr": 1.1735587718134212e-05, "epoch": 0.945508464104782, "percentage": 47.26, "elapsed_time": "6:55:02", "remaining_time": "7:43:12"} +{"current_steps": 1914, "total_steps": 4048, "loss": 0.2810664176940918, "lr": 1.172754054850619e-05, "epoch": 0.946002718398616, "percentage": 47.28, "elapsed_time": "6:55:15", "remaining_time": "7:42:59"} +{"current_steps": 1915, "total_steps": 4048, "loss": 0.28850311040878296, "lr": 1.1719492225596783e-05, "epoch": 0.9464969726924503, "percentage": 47.31, "elapsed_time": "6:55:28", "remaining_time": "7:42:46"} +{"current_steps": 1916, "total_steps": 4048, "loss": 0.32268932461738586, "lr": 1.1711442754778936e-05, "epoch": 0.9469912269862845, "percentage": 47.33, "elapsed_time": "6:55:41", "remaining_time": "7:42:33"} +{"current_steps": 1917, "total_steps": 4048, "loss": 0.3149149715900421, "lr": 1.1703392141426356e-05, "epoch": 0.9474854812801187, "percentage": 47.36, "elapsed_time": "6:55:55", "remaining_time": "7:42:20"} +{"current_steps": 1918, "total_steps": 4048, "loss": 0.2537482678890228, "lr": 1.1695340390913526e-05, "epoch": 0.9479797355739528, "percentage": 47.38, "elapsed_time": "6:56:08", "remaining_time": "7:42:08"} +{"current_steps": 1919, "total_steps": 4048, "loss": 0.2611936330795288, "lr": 1.168728750861567e-05, "epoch": 0.948473989867787, "percentage": 47.41, "elapsed_time": "6:56:21", "remaining_time": "7:41:55"} +{"current_steps": 1920, "total_steps": 4048, "loss": 0.263653427362442, "lr": 1.1679233499908781e-05, "epoch": 0.9489682441616212, "percentage": 47.43, "elapsed_time": "6:56:34", "remaining_time": "7:41:42"} +{"current_steps": 1921, "total_steps": 4048, "loss": 0.3122594952583313, "lr": 1.1671178370169604e-05, "epoch": 0.9494624984554554, "percentage": 47.46, "elapsed_time": "6:56:48", "remaining_time": "7:41:29"} +{"current_steps": 1922, "total_steps": 4048, "loss": 0.3101043701171875, "lr": 1.1663122124775626e-05, "epoch": 0.9499567527492895, "percentage": 47.48, "elapsed_time": "6:57:01", "remaining_time": "7:41:17"} +{"current_steps": 1923, "total_steps": 4048, "loss": 0.295572966337204, "lr": 1.1655064769105077e-05, "epoch": 0.9504510070431237, "percentage": 47.5, "elapsed_time": "6:57:14", "remaining_time": "7:41:04"} +{"current_steps": 1924, "total_steps": 4048, "loss": 0.29732125997543335, "lr": 1.1647006308536937e-05, "epoch": 0.9509452613369579, "percentage": 47.53, "elapsed_time": "6:57:28", "remaining_time": "7:40:51"} +{"current_steps": 1925, "total_steps": 4048, "loss": 0.32320737838745117, "lr": 1.1638946748450922e-05, "epoch": 0.951439515630792, "percentage": 47.55, "elapsed_time": "6:57:40", "remaining_time": "7:40:38"} +{"current_steps": 1926, "total_steps": 4048, "loss": 0.3306753933429718, "lr": 1.1630886094227471e-05, "epoch": 0.9519337699246262, "percentage": 47.58, "elapsed_time": "6:57:54", "remaining_time": "7:40:26"} +{"current_steps": 1927, "total_steps": 4048, "loss": 0.2368355095386505, "lr": 1.1622824351247767e-05, "epoch": 0.9524280242184604, "percentage": 47.6, "elapsed_time": "6:58:07", "remaining_time": "7:40:13"} +{"current_steps": 1928, "total_steps": 4048, "loss": 0.28470784425735474, "lr": 1.1614761524893715e-05, "epoch": 0.9529222785122946, "percentage": 47.63, "elapsed_time": "6:58:21", "remaining_time": "7:40:00"} +{"current_steps": 1929, "total_steps": 4048, "loss": 0.34468895196914673, "lr": 1.160669762054794e-05, "epoch": 0.9534165328061287, "percentage": 47.65, "elapsed_time": "6:58:34", "remaining_time": "7:39:47"} +{"current_steps": 1930, "total_steps": 4048, "loss": 0.30562442541122437, "lr": 1.1598632643593787e-05, "epoch": 0.9539107870999629, "percentage": 47.68, "elapsed_time": "6:58:47", "remaining_time": "7:39:35"} +{"current_steps": 1931, "total_steps": 4048, "loss": 0.2861478924751282, "lr": 1.159056659941533e-05, "epoch": 0.9544050413937971, "percentage": 47.7, "elapsed_time": "6:59:00", "remaining_time": "7:39:22"} +{"current_steps": 1932, "total_steps": 4048, "loss": 0.32385969161987305, "lr": 1.1582499493397332e-05, "epoch": 0.9548992956876313, "percentage": 47.73, "elapsed_time": "6:59:14", "remaining_time": "7:39:09"} +{"current_steps": 1933, "total_steps": 4048, "loss": 0.2935449481010437, "lr": 1.1574431330925287e-05, "epoch": 0.9553935499814654, "percentage": 47.75, "elapsed_time": "6:59:27", "remaining_time": "7:38:56"} +{"current_steps": 1934, "total_steps": 4048, "loss": 0.29380083084106445, "lr": 1.156636211738538e-05, "epoch": 0.9558878042752996, "percentage": 47.78, "elapsed_time": "6:59:40", "remaining_time": "7:38:43"} +{"current_steps": 1935, "total_steps": 4048, "loss": 0.2957204282283783, "lr": 1.1558291858164503e-05, "epoch": 0.9563820585691338, "percentage": 47.8, "elapsed_time": "6:59:53", "remaining_time": "7:38:30"} +{"current_steps": 1936, "total_steps": 4048, "loss": 0.26402851939201355, "lr": 1.1550220558650246e-05, "epoch": 0.956876312862968, "percentage": 47.83, "elapsed_time": "7:00:06", "remaining_time": "7:38:17"} +{"current_steps": 1937, "total_steps": 4048, "loss": 0.29163527488708496, "lr": 1.1542148224230897e-05, "epoch": 0.9573705671568021, "percentage": 47.85, "elapsed_time": "7:00:18", "remaining_time": "7:38:04"} +{"current_steps": 1938, "total_steps": 4048, "loss": 0.302470326423645, "lr": 1.1534074860295426e-05, "epoch": 0.9578648214506363, "percentage": 47.88, "elapsed_time": "7:00:31", "remaining_time": "7:37:51"} +{"current_steps": 1939, "total_steps": 4048, "loss": 0.25946593284606934, "lr": 1.15260004722335e-05, "epoch": 0.9583590757444705, "percentage": 47.9, "elapsed_time": "7:00:45", "remaining_time": "7:37:38"} +{"current_steps": 1940, "total_steps": 4048, "loss": 0.2680559456348419, "lr": 1.1517925065435457e-05, "epoch": 0.9588533300383048, "percentage": 47.92, "elapsed_time": "7:00:58", "remaining_time": "7:37:25"} +{"current_steps": 1941, "total_steps": 4048, "loss": 0.2684473991394043, "lr": 1.1509848645292334e-05, "epoch": 0.9593475843321388, "percentage": 47.95, "elapsed_time": "7:01:11", "remaining_time": "7:37:12"} +{"current_steps": 1942, "total_steps": 4048, "loss": 0.2795519232749939, "lr": 1.1501771217195827e-05, "epoch": 0.959841838625973, "percentage": 47.97, "elapsed_time": "7:01:24", "remaining_time": "7:36:59"} +{"current_steps": 1943, "total_steps": 4048, "loss": 0.35209575295448303, "lr": 1.1493692786538313e-05, "epoch": 0.9603360929198073, "percentage": 48.0, "elapsed_time": "7:01:37", "remaining_time": "7:36:46"} +{"current_steps": 1944, "total_steps": 4048, "loss": 0.3058928847312927, "lr": 1.1485613358712839e-05, "epoch": 0.9608303472136415, "percentage": 48.02, "elapsed_time": "7:01:50", "remaining_time": "7:36:33"} +{"current_steps": 1945, "total_steps": 4048, "loss": 0.2889159619808197, "lr": 1.1477532939113112e-05, "epoch": 0.9613246015074756, "percentage": 48.05, "elapsed_time": "7:02:03", "remaining_time": "7:36:21"} +{"current_steps": 1946, "total_steps": 4048, "loss": 0.30782538652420044, "lr": 1.1469451533133506e-05, "epoch": 0.9618188558013098, "percentage": 48.07, "elapsed_time": "7:02:16", "remaining_time": "7:36:08"} +{"current_steps": 1947, "total_steps": 4048, "loss": 0.3091726005077362, "lr": 1.1461369146169052e-05, "epoch": 0.962313110095144, "percentage": 48.1, "elapsed_time": "7:02:30", "remaining_time": "7:35:55"} +{"current_steps": 1948, "total_steps": 4048, "loss": 0.3287050724029541, "lr": 1.1453285783615438e-05, "epoch": 0.9628073643889782, "percentage": 48.12, "elapsed_time": "7:02:43", "remaining_time": "7:35:42"} +{"current_steps": 1949, "total_steps": 4048, "loss": 0.31267625093460083, "lr": 1.1445201450868998e-05, "epoch": 0.9633016186828123, "percentage": 48.15, "elapsed_time": "7:02:56", "remaining_time": "7:35:29"} +{"current_steps": 1950, "total_steps": 4048, "loss": 0.30775952339172363, "lr": 1.1437116153326719e-05, "epoch": 0.9637958729766465, "percentage": 48.17, "elapsed_time": "7:03:10", "remaining_time": "7:35:17"} +{"current_steps": 1951, "total_steps": 4048, "loss": 0.3825497329235077, "lr": 1.142902989638623e-05, "epoch": 0.9642901272704807, "percentage": 48.2, "elapsed_time": "7:03:23", "remaining_time": "7:35:04"} +{"current_steps": 1952, "total_steps": 4048, "loss": 0.2866062521934509, "lr": 1.1420942685445801e-05, "epoch": 0.9647843815643148, "percentage": 48.22, "elapsed_time": "7:03:36", "remaining_time": "7:34:51"} +{"current_steps": 1953, "total_steps": 4048, "loss": 0.27787062525749207, "lr": 1.1412854525904335e-05, "epoch": 0.965278635858149, "percentage": 48.25, "elapsed_time": "7:03:49", "remaining_time": "7:34:38"} +{"current_steps": 1954, "total_steps": 4048, "loss": 0.302572101354599, "lr": 1.1404765423161381e-05, "epoch": 0.9657728901519832, "percentage": 48.27, "elapsed_time": "7:04:03", "remaining_time": "7:34:26"} +{"current_steps": 1955, "total_steps": 4048, "loss": 0.29608359932899475, "lr": 1.1396675382617097e-05, "epoch": 0.9662671444458174, "percentage": 48.3, "elapsed_time": "7:04:16", "remaining_time": "7:34:13"} +{"current_steps": 1956, "total_steps": 4048, "loss": 0.28057801723480225, "lr": 1.1388584409672285e-05, "epoch": 0.9667613987396515, "percentage": 48.32, "elapsed_time": "7:04:30", "remaining_time": "7:34:00"} +{"current_steps": 1957, "total_steps": 4048, "loss": 0.29628869891166687, "lr": 1.1380492509728363e-05, "epoch": 0.9672556530334857, "percentage": 48.34, "elapsed_time": "7:04:42", "remaining_time": "7:33:47"} +{"current_steps": 1958, "total_steps": 4048, "loss": 0.29254984855651855, "lr": 1.1372399688187365e-05, "epoch": 0.9677499073273199, "percentage": 48.37, "elapsed_time": "7:04:56", "remaining_time": "7:33:35"} +{"current_steps": 1959, "total_steps": 4048, "loss": 0.32925280928611755, "lr": 1.1364305950451946e-05, "epoch": 0.9682441616211541, "percentage": 48.39, "elapsed_time": "7:05:09", "remaining_time": "7:33:21"} +{"current_steps": 1960, "total_steps": 4048, "loss": 0.3072258234024048, "lr": 1.1356211301925367e-05, "epoch": 0.9687384159149882, "percentage": 48.42, "elapsed_time": "7:05:22", "remaining_time": "7:33:09"} +{"current_steps": 1961, "total_steps": 4048, "loss": 0.29737845063209534, "lr": 1.1348115748011499e-05, "epoch": 0.9692326702088224, "percentage": 48.44, "elapsed_time": "7:05:36", "remaining_time": "7:32:57"} +{"current_steps": 1962, "total_steps": 4048, "loss": 0.27369949221611023, "lr": 1.1340019294114822e-05, "epoch": 0.9697269245026566, "percentage": 48.47, "elapsed_time": "7:05:49", "remaining_time": "7:32:44"} +{"current_steps": 1963, "total_steps": 4048, "loss": 0.33116602897644043, "lr": 1.1331921945640408e-05, "epoch": 0.9702211787964908, "percentage": 48.49, "elapsed_time": "7:06:02", "remaining_time": "7:32:31"} +{"current_steps": 1964, "total_steps": 4048, "loss": 0.2620438039302826, "lr": 1.1323823707993937e-05, "epoch": 0.9707154330903249, "percentage": 48.52, "elapsed_time": "7:06:15", "remaining_time": "7:32:18"} +{"current_steps": 1965, "total_steps": 4048, "loss": 0.3187680244445801, "lr": 1.1315724586581673e-05, "epoch": 0.9712096873841591, "percentage": 48.54, "elapsed_time": "7:06:30", "remaining_time": "7:32:06"} +{"current_steps": 1966, "total_steps": 4048, "loss": 0.3675233721733093, "lr": 1.1307624586810472e-05, "epoch": 0.9717039416779933, "percentage": 48.57, "elapsed_time": "7:06:43", "remaining_time": "7:31:53"} +{"current_steps": 1967, "total_steps": 4048, "loss": 0.31064945459365845, "lr": 1.1299523714087784e-05, "epoch": 0.9721981959718276, "percentage": 48.59, "elapsed_time": "7:06:56", "remaining_time": "7:31:41"} +{"current_steps": 1968, "total_steps": 4048, "loss": 0.2941773235797882, "lr": 1.1291421973821632e-05, "epoch": 0.9726924502656616, "percentage": 48.62, "elapsed_time": "7:07:09", "remaining_time": "7:31:28"} +{"current_steps": 1969, "total_steps": 4048, "loss": 0.3443846106529236, "lr": 1.128331937142062e-05, "epoch": 0.9731867045594959, "percentage": 48.64, "elapsed_time": "7:07:23", "remaining_time": "7:31:16"} +{"current_steps": 1970, "total_steps": 4048, "loss": 0.2815151810646057, "lr": 1.1275215912293933e-05, "epoch": 0.9736809588533301, "percentage": 48.67, "elapsed_time": "7:07:36", "remaining_time": "7:31:03"} +{"current_steps": 1971, "total_steps": 4048, "loss": 0.2886476516723633, "lr": 1.1267111601851327e-05, "epoch": 0.9741752131471643, "percentage": 48.69, "elapsed_time": "7:07:50", "remaining_time": "7:30:51"} +{"current_steps": 1972, "total_steps": 4048, "loss": 0.2692835330963135, "lr": 1.1259006445503116e-05, "epoch": 0.9746694674409984, "percentage": 48.72, "elapsed_time": "7:08:04", "remaining_time": "7:30:38"} +{"current_steps": 1973, "total_steps": 4048, "loss": 0.2748587727546692, "lr": 1.1250900448660192e-05, "epoch": 0.9751637217348326, "percentage": 48.74, "elapsed_time": "7:08:17", "remaining_time": "7:30:26"} +{"current_steps": 1974, "total_steps": 4048, "loss": 0.2963098883628845, "lr": 1.1242793616734002e-05, "epoch": 0.9756579760286668, "percentage": 48.76, "elapsed_time": "7:08:30", "remaining_time": "7:30:13"} +{"current_steps": 1975, "total_steps": 4048, "loss": 0.28353193402290344, "lr": 1.1234685955136552e-05, "epoch": 0.976152230322501, "percentage": 48.79, "elapsed_time": "7:08:43", "remaining_time": "7:30:00"} +{"current_steps": 1976, "total_steps": 4048, "loss": 0.3308493494987488, "lr": 1.1226577469280397e-05, "epoch": 0.9766464846163351, "percentage": 48.81, "elapsed_time": "7:08:56", "remaining_time": "7:29:47"} +{"current_steps": 1977, "total_steps": 4048, "loss": 0.26923754811286926, "lr": 1.1218468164578653e-05, "epoch": 0.9771407389101693, "percentage": 48.84, "elapsed_time": "7:09:09", "remaining_time": "7:29:34"} +{"current_steps": 1978, "total_steps": 4048, "loss": 0.2730574905872345, "lr": 1.1210358046444968e-05, "epoch": 0.9776349932040035, "percentage": 48.86, "elapsed_time": "7:09:23", "remaining_time": "7:29:21"} +{"current_steps": 1979, "total_steps": 4048, "loss": 0.26464858651161194, "lr": 1.1202247120293548e-05, "epoch": 0.9781292474978376, "percentage": 48.89, "elapsed_time": "7:09:35", "remaining_time": "7:29:08"} +{"current_steps": 1980, "total_steps": 4048, "loss": 0.30095499753952026, "lr": 1.1194135391539127e-05, "epoch": 0.9786235017916718, "percentage": 48.91, "elapsed_time": "7:09:48", "remaining_time": "7:28:55"} +{"current_steps": 1981, "total_steps": 4048, "loss": 0.3418167233467102, "lr": 1.1186022865596983e-05, "epoch": 0.979117756085506, "percentage": 48.94, "elapsed_time": "7:10:01", "remaining_time": "7:28:42"} +{"current_steps": 1982, "total_steps": 4048, "loss": 0.28735262155532837, "lr": 1.117790954788292e-05, "epoch": 0.9796120103793402, "percentage": 48.96, "elapsed_time": "7:10:15", "remaining_time": "7:28:29"} +{"current_steps": 1983, "total_steps": 4048, "loss": 0.26816800236701965, "lr": 1.116979544381327e-05, "epoch": 0.9801062646731743, "percentage": 48.99, "elapsed_time": "7:10:28", "remaining_time": "7:28:16"} +{"current_steps": 1984, "total_steps": 4048, "loss": 0.31004661321640015, "lr": 1.1161680558804897e-05, "epoch": 0.9806005189670085, "percentage": 49.01, "elapsed_time": "7:10:41", "remaining_time": "7:28:03"} +{"current_steps": 1985, "total_steps": 4048, "loss": 0.33103084564208984, "lr": 1.1153564898275184e-05, "epoch": 0.9810947732608427, "percentage": 49.04, "elapsed_time": "7:10:54", "remaining_time": "7:27:50"} +{"current_steps": 1986, "total_steps": 4048, "loss": 0.3804841637611389, "lr": 1.1145448467642021e-05, "epoch": 0.9815890275546769, "percentage": 49.06, "elapsed_time": "7:11:08", "remaining_time": "7:27:38"} +{"current_steps": 1987, "total_steps": 4048, "loss": 0.31861352920532227, "lr": 1.1137331272323834e-05, "epoch": 0.982083281848511, "percentage": 49.09, "elapsed_time": "7:11:21", "remaining_time": "7:27:25"} +{"current_steps": 1988, "total_steps": 4048, "loss": 0.3022298216819763, "lr": 1.1129213317739539e-05, "epoch": 0.9825775361423452, "percentage": 49.11, "elapsed_time": "7:11:34", "remaining_time": "7:27:12"} +{"current_steps": 1989, "total_steps": 4048, "loss": 0.38203683495521545, "lr": 1.1121094609308564e-05, "epoch": 0.9830717904361794, "percentage": 49.14, "elapsed_time": "7:11:47", "remaining_time": "7:26:59"} +{"current_steps": 1990, "total_steps": 4048, "loss": 0.3105717897415161, "lr": 1.1112975152450848e-05, "epoch": 0.9835660447300136, "percentage": 49.16, "elapsed_time": "7:12:00", "remaining_time": "7:26:46"} +{"current_steps": 1991, "total_steps": 4048, "loss": 0.31930285692214966, "lr": 1.1104854952586827e-05, "epoch": 0.9840602990238477, "percentage": 49.18, "elapsed_time": "7:12:14", "remaining_time": "7:26:33"} +{"current_steps": 1992, "total_steps": 4048, "loss": 0.3167966902256012, "lr": 1.1096734015137422e-05, "epoch": 0.9845545533176819, "percentage": 49.21, "elapsed_time": "7:12:27", "remaining_time": "7:26:20"} +{"current_steps": 1993, "total_steps": 4048, "loss": 0.2693050801753998, "lr": 1.1088612345524059e-05, "epoch": 0.9850488076115161, "percentage": 49.23, "elapsed_time": "7:12:40", "remaining_time": "7:26:08"} +{"current_steps": 1994, "total_steps": 4048, "loss": 0.27986466884613037, "lr": 1.1080489949168651e-05, "epoch": 0.9855430619053503, "percentage": 49.26, "elapsed_time": "7:12:53", "remaining_time": "7:25:55"} +{"current_steps": 1995, "total_steps": 4048, "loss": 0.26814526319503784, "lr": 1.1072366831493589e-05, "epoch": 0.9860373161991844, "percentage": 49.28, "elapsed_time": "7:13:07", "remaining_time": "7:25:43"} +{"current_steps": 1996, "total_steps": 4048, "loss": 0.31393951177597046, "lr": 1.1064242997921753e-05, "epoch": 0.9865315704930187, "percentage": 49.31, "elapsed_time": "7:13:20", "remaining_time": "7:25:30"} +{"current_steps": 1997, "total_steps": 4048, "loss": 0.2958461344242096, "lr": 1.1056118453876496e-05, "epoch": 0.9870258247868529, "percentage": 49.33, "elapsed_time": "7:13:34", "remaining_time": "7:25:17"} +{"current_steps": 1998, "total_steps": 4048, "loss": 0.29744619131088257, "lr": 1.1047993204781652e-05, "epoch": 0.9875200790806871, "percentage": 49.36, "elapsed_time": "7:13:47", "remaining_time": "7:25:04"} +{"current_steps": 1999, "total_steps": 4048, "loss": 0.29055094718933105, "lr": 1.1039867256061516e-05, "epoch": 0.9880143333745212, "percentage": 49.38, "elapsed_time": "7:14:00", "remaining_time": "7:24:52"} +{"current_steps": 2000, "total_steps": 4048, "loss": 0.29961663484573364, "lr": 1.103174061314086e-05, "epoch": 0.9885085876683554, "percentage": 49.41, "elapsed_time": "7:14:13", "remaining_time": "7:24:38"} +{"current_steps": 2001, "total_steps": 4048, "loss": 0.34533610939979553, "lr": 1.102361328144491e-05, "epoch": 0.9890028419621896, "percentage": 49.43, "elapsed_time": "7:14:32", "remaining_time": "7:24:32"} +{"current_steps": 2002, "total_steps": 4048, "loss": 0.2994460463523865, "lr": 1.1015485266399362e-05, "epoch": 0.9894970962560238, "percentage": 49.46, "elapsed_time": "7:14:45", "remaining_time": "7:24:19"} +{"current_steps": 2003, "total_steps": 4048, "loss": 0.34309566020965576, "lr": 1.1007356573430357e-05, "epoch": 0.9899913505498579, "percentage": 49.48, "elapsed_time": "7:14:59", "remaining_time": "7:24:06"} +{"current_steps": 2004, "total_steps": 4048, "loss": 0.3049868643283844, "lr": 1.09992272079645e-05, "epoch": 0.9904856048436921, "percentage": 49.51, "elapsed_time": "7:15:12", "remaining_time": "7:23:53"} +{"current_steps": 2005, "total_steps": 4048, "loss": 0.30586326122283936, "lr": 1.0991097175428833e-05, "epoch": 0.9909798591375263, "percentage": 49.53, "elapsed_time": "7:15:25", "remaining_time": "7:23:40"} +{"current_steps": 2006, "total_steps": 4048, "loss": 0.29740482568740845, "lr": 1.0982966481250854e-05, "epoch": 0.9914741134313604, "percentage": 49.56, "elapsed_time": "7:15:38", "remaining_time": "7:23:27"} +{"current_steps": 2007, "total_steps": 4048, "loss": 0.3218206465244293, "lr": 1.0974835130858497e-05, "epoch": 0.9919683677251946, "percentage": 49.58, "elapsed_time": "7:15:51", "remaining_time": "7:23:14"} +{"current_steps": 2008, "total_steps": 4048, "loss": 0.2747582495212555, "lr": 1.0966703129680139e-05, "epoch": 0.9924626220190288, "percentage": 49.6, "elapsed_time": "7:16:04", "remaining_time": "7:23:01"} +{"current_steps": 2009, "total_steps": 4048, "loss": 0.33215245604515076, "lr": 1.0958570483144578e-05, "epoch": 0.992956876312863, "percentage": 49.63, "elapsed_time": "7:16:17", "remaining_time": "7:22:48"} +{"current_steps": 2010, "total_steps": 4048, "loss": 0.3149756193161011, "lr": 1.0950437196681061e-05, "epoch": 0.9934511306066971, "percentage": 49.65, "elapsed_time": "7:16:30", "remaining_time": "7:22:35"} +{"current_steps": 2011, "total_steps": 4048, "loss": 0.2763513922691345, "lr": 1.0942303275719253e-05, "epoch": 0.9939453849005313, "percentage": 49.68, "elapsed_time": "7:16:43", "remaining_time": "7:22:22"} +{"current_steps": 2012, "total_steps": 4048, "loss": 0.2818325161933899, "lr": 1.0934168725689239e-05, "epoch": 0.9944396391943655, "percentage": 49.7, "elapsed_time": "7:16:57", "remaining_time": "7:22:10"} +{"current_steps": 2013, "total_steps": 4048, "loss": 0.2659858167171478, "lr": 1.0926033552021533e-05, "epoch": 0.9949338934881997, "percentage": 49.73, "elapsed_time": "7:17:10", "remaining_time": "7:21:57"} +{"current_steps": 2014, "total_steps": 4048, "loss": 0.30891451239585876, "lr": 1.091789776014706e-05, "epoch": 0.9954281477820338, "percentage": 49.75, "elapsed_time": "7:17:24", "remaining_time": "7:21:44"} +{"current_steps": 2015, "total_steps": 4048, "loss": 0.33645111322402954, "lr": 1.0909761355497156e-05, "epoch": 0.995922402075868, "percentage": 49.78, "elapsed_time": "7:17:37", "remaining_time": "7:21:31"} +{"current_steps": 2016, "total_steps": 4048, "loss": 0.3086194097995758, "lr": 1.0901624343503571e-05, "epoch": 0.9964166563697022, "percentage": 49.8, "elapsed_time": "7:17:50", "remaining_time": "7:21:19"} +{"current_steps": 2017, "total_steps": 4048, "loss": 0.2614179253578186, "lr": 1.089348672959846e-05, "epoch": 0.9969109106635364, "percentage": 49.83, "elapsed_time": "7:18:04", "remaining_time": "7:21:06"} +{"current_steps": 2018, "total_steps": 4048, "loss": 0.3300556540489197, "lr": 1.088534851921437e-05, "epoch": 0.9974051649573705, "percentage": 49.85, "elapsed_time": "7:18:17", "remaining_time": "7:20:54"} +{"current_steps": 2019, "total_steps": 4048, "loss": 0.28443643450737, "lr": 1.087720971778426e-05, "epoch": 0.9978994192512047, "percentage": 49.88, "elapsed_time": "7:18:30", "remaining_time": "7:20:41"} +{"current_steps": 2020, "total_steps": 4048, "loss": 0.2805534601211548, "lr": 1.0869070330741475e-05, "epoch": 0.9983936735450389, "percentage": 49.9, "elapsed_time": "7:18:43", "remaining_time": "7:20:28"} +{"current_steps": 2021, "total_steps": 4048, "loss": 0.28186699748039246, "lr": 1.0860930363519758e-05, "epoch": 0.9988879278388731, "percentage": 49.93, "elapsed_time": "7:18:57", "remaining_time": "7:20:15"} +{"current_steps": 2022, "total_steps": 4048, "loss": 0.3527688980102539, "lr": 1.0852789821553228e-05, "epoch": 0.9993821821327072, "percentage": 49.95, "elapsed_time": "7:19:10", "remaining_time": "7:20:02"} +{"current_steps": 2023, "total_steps": 4048, "loss": 0.30708247423171997, "lr": 1.08446487102764e-05, "epoch": 0.9998764364265414, "percentage": 49.98, "elapsed_time": "7:19:23", "remaining_time": "7:19:49"} +{"current_steps": 2024, "total_steps": 4048, "loss": 0.3015655279159546, "lr": 1.083650703512416e-05, "epoch": 1.0, "percentage": 50.0, "elapsed_time": "7:19:26", "remaining_time": "7:19:26"} +{"current_steps": 2025, "total_steps": 4048, "loss": 0.29792484641075134, "lr": 1.0828364801531777e-05, "epoch": 1.0004942542938342, "percentage": 50.02, "elapsed_time": "7:19:39", "remaining_time": "7:19:13"} +{"current_steps": 2026, "total_steps": 4048, "loss": 0.27995994687080383, "lr": 1.0820222014934887e-05, "epoch": 1.0009885085876684, "percentage": 50.05, "elapsed_time": "7:19:53", "remaining_time": "7:19:01"} +{"current_steps": 2027, "total_steps": 4048, "loss": 0.25797444581985474, "lr": 1.0812078680769501e-05, "epoch": 1.0014827628815026, "percentage": 50.07, "elapsed_time": "7:20:06", "remaining_time": "7:18:48"} +{"current_steps": 2028, "total_steps": 4048, "loss": 0.2834373116493225, "lr": 1.0803934804471991e-05, "epoch": 1.0019770171753366, "percentage": 50.1, "elapsed_time": "7:20:20", "remaining_time": "7:18:35"} +{"current_steps": 2029, "total_steps": 4048, "loss": 0.27055832743644714, "lr": 1.079579039147909e-05, "epoch": 1.0024712714691708, "percentage": 50.12, "elapsed_time": "7:20:33", "remaining_time": "7:18:22"} +{"current_steps": 2030, "total_steps": 4048, "loss": 0.30029311776161194, "lr": 1.0787645447227897e-05, "epoch": 1.002965525763005, "percentage": 50.15, "elapsed_time": "7:20:46", "remaining_time": "7:18:10"} +{"current_steps": 2031, "total_steps": 4048, "loss": 0.2741442322731018, "lr": 1.0779499977155858e-05, "epoch": 1.0034597800568392, "percentage": 50.17, "elapsed_time": "7:20:59", "remaining_time": "7:17:57"} +{"current_steps": 2032, "total_steps": 4048, "loss": 0.27097994089126587, "lr": 1.0771353986700767e-05, "epoch": 1.0039540343506734, "percentage": 50.2, "elapsed_time": "7:21:13", "remaining_time": "7:17:44"} +{"current_steps": 2033, "total_steps": 4048, "loss": 0.2690125107765198, "lr": 1.0763207481300781e-05, "epoch": 1.0044482886445076, "percentage": 50.22, "elapsed_time": "7:21:25", "remaining_time": "7:17:31"} +{"current_steps": 2034, "total_steps": 4048, "loss": 0.29656079411506653, "lr": 1.0755060466394383e-05, "epoch": 1.0049425429383418, "percentage": 50.25, "elapsed_time": "7:21:39", "remaining_time": "7:17:18"} +{"current_steps": 2035, "total_steps": 4048, "loss": 0.25291675329208374, "lr": 1.0746912947420407e-05, "epoch": 1.005436797232176, "percentage": 50.27, "elapsed_time": "7:21:52", "remaining_time": "7:17:05"} +{"current_steps": 2036, "total_steps": 4048, "loss": 0.26391562819480896, "lr": 1.0738764929818017e-05, "epoch": 1.00593105152601, "percentage": 50.3, "elapsed_time": "7:22:04", "remaining_time": "7:16:52"} +{"current_steps": 2037, "total_steps": 4048, "loss": 0.2850308418273926, "lr": 1.073061641902672e-05, "epoch": 1.0064253058198442, "percentage": 50.32, "elapsed_time": "7:22:18", "remaining_time": "7:16:39"} +{"current_steps": 2038, "total_steps": 4048, "loss": 0.2529013454914093, "lr": 1.0722467420486338e-05, "epoch": 1.0069195601136784, "percentage": 50.35, "elapsed_time": "7:22:31", "remaining_time": "7:16:26"} +{"current_steps": 2039, "total_steps": 4048, "loss": 0.2577154040336609, "lr": 1.0714317939637028e-05, "epoch": 1.0074138144075127, "percentage": 50.37, "elapsed_time": "7:22:44", "remaining_time": "7:16:14"} +{"current_steps": 2040, "total_steps": 4048, "loss": 0.28677526116371155, "lr": 1.0706167981919269e-05, "epoch": 1.0079080687013469, "percentage": 50.4, "elapsed_time": "7:22:57", "remaining_time": "7:16:00"} +{"current_steps": 2041, "total_steps": 4048, "loss": 0.25146183371543884, "lr": 1.0698017552773859e-05, "epoch": 1.008402322995181, "percentage": 50.42, "elapsed_time": "7:23:11", "remaining_time": "7:15:48"} +{"current_steps": 2042, "total_steps": 4048, "loss": 0.29958251118659973, "lr": 1.0689866657641899e-05, "epoch": 1.0088965772890153, "percentage": 50.44, "elapsed_time": "7:23:24", "remaining_time": "7:15:35"} +{"current_steps": 2043, "total_steps": 4048, "loss": 0.28512266278266907, "lr": 1.0681715301964817e-05, "epoch": 1.0093908315828495, "percentage": 50.47, "elapsed_time": "7:23:37", "remaining_time": "7:15:22"} +{"current_steps": 2044, "total_steps": 4048, "loss": 0.29768145084381104, "lr": 1.067356349118434e-05, "epoch": 1.0098850858766835, "percentage": 50.49, "elapsed_time": "7:23:50", "remaining_time": "7:15:09"} +{"current_steps": 2045, "total_steps": 4048, "loss": 0.25144103169441223, "lr": 1.0665411230742498e-05, "epoch": 1.0103793401705177, "percentage": 50.52, "elapsed_time": "7:24:03", "remaining_time": "7:14:56"} +{"current_steps": 2046, "total_steps": 4048, "loss": 0.2673259973526001, "lr": 1.0657258526081629e-05, "epoch": 1.0108735944643519, "percentage": 50.54, "elapsed_time": "7:24:16", "remaining_time": "7:14:43"} +{"current_steps": 2047, "total_steps": 4048, "loss": 0.2845848500728607, "lr": 1.0649105382644359e-05, "epoch": 1.011367848758186, "percentage": 50.57, "elapsed_time": "7:24:29", "remaining_time": "7:14:30"} +{"current_steps": 2048, "total_steps": 4048, "loss": 0.2569392919540405, "lr": 1.0640951805873607e-05, "epoch": 1.0118621030520203, "percentage": 50.59, "elapsed_time": "7:24:42", "remaining_time": "7:14:17"} +{"current_steps": 2049, "total_steps": 4048, "loss": 0.250387966632843, "lr": 1.0632797801212591e-05, "epoch": 1.0123563573458545, "percentage": 50.62, "elapsed_time": "7:24:55", "remaining_time": "7:14:04"} +{"current_steps": 2050, "total_steps": 4048, "loss": 0.28228282928466797, "lr": 1.0624643374104804e-05, "epoch": 1.0128506116396887, "percentage": 50.64, "elapsed_time": "7:25:09", "remaining_time": "7:13:52"} +{"current_steps": 2051, "total_steps": 4048, "loss": 0.24724754691123962, "lr": 1.0616488529994024e-05, "epoch": 1.0133448659335227, "percentage": 50.67, "elapsed_time": "7:25:22", "remaining_time": "7:13:39"} +{"current_steps": 2052, "total_steps": 4048, "loss": 0.268532395362854, "lr": 1.0608333274324312e-05, "epoch": 1.013839120227357, "percentage": 50.69, "elapsed_time": "7:25:36", "remaining_time": "7:13:26"} +{"current_steps": 2053, "total_steps": 4048, "loss": 0.27454662322998047, "lr": 1.0600177612539995e-05, "epoch": 1.014333374521191, "percentage": 50.72, "elapsed_time": "7:25:49", "remaining_time": "7:13:13"} +{"current_steps": 2054, "total_steps": 4048, "loss": 0.27497538924217224, "lr": 1.0592021550085683e-05, "epoch": 1.0148276288150253, "percentage": 50.74, "elapsed_time": "7:26:03", "remaining_time": "7:13:01"} +{"current_steps": 2055, "total_steps": 4048, "loss": 0.24480152130126953, "lr": 1.0583865092406237e-05, "epoch": 1.0153218831088595, "percentage": 50.77, "elapsed_time": "7:26:15", "remaining_time": "7:12:48"} +{"current_steps": 2056, "total_steps": 4048, "loss": 0.23754069209098816, "lr": 1.0575708244946805e-05, "epoch": 1.0158161374026937, "percentage": 50.79, "elapsed_time": "7:26:29", "remaining_time": "7:12:35"} +{"current_steps": 2057, "total_steps": 4048, "loss": 0.24541275203227997, "lr": 1.056755101315277e-05, "epoch": 1.016310391696528, "percentage": 50.82, "elapsed_time": "7:26:42", "remaining_time": "7:12:22"} +{"current_steps": 2058, "total_steps": 4048, "loss": 0.27724504470825195, "lr": 1.055939340246979e-05, "epoch": 1.0168046459903621, "percentage": 50.84, "elapsed_time": "7:26:56", "remaining_time": "7:12:10"} +{"current_steps": 2059, "total_steps": 4048, "loss": 0.2869918942451477, "lr": 1.0551235418343766e-05, "epoch": 1.0172989002841961, "percentage": 50.86, "elapsed_time": "7:27:09", "remaining_time": "7:11:56"} +{"current_steps": 2060, "total_steps": 4048, "loss": 0.27153679728507996, "lr": 1.0543077066220854e-05, "epoch": 1.0177931545780303, "percentage": 50.89, "elapsed_time": "7:27:22", "remaining_time": "7:11:44"} +{"current_steps": 2061, "total_steps": 4048, "loss": 0.2611347436904907, "lr": 1.0534918351547454e-05, "epoch": 1.0182874088718645, "percentage": 50.91, "elapsed_time": "7:27:35", "remaining_time": "7:11:31"} +{"current_steps": 2062, "total_steps": 4048, "loss": 0.26649200916290283, "lr": 1.0526759279770202e-05, "epoch": 1.0187816631656987, "percentage": 50.94, "elapsed_time": "7:27:49", "remaining_time": "7:11:19"} +{"current_steps": 2063, "total_steps": 4048, "loss": 0.25164204835891724, "lr": 1.0518599856335983e-05, "epoch": 1.019275917459533, "percentage": 50.96, "elapsed_time": "7:28:02", "remaining_time": "7:11:06"} +{"current_steps": 2064, "total_steps": 4048, "loss": 0.288251131772995, "lr": 1.0510440086691911e-05, "epoch": 1.0197701717533671, "percentage": 50.99, "elapsed_time": "7:28:16", "remaining_time": "7:10:53"} +{"current_steps": 2065, "total_steps": 4048, "loss": 0.27177444100379944, "lr": 1.0502279976285325e-05, "epoch": 1.0202644260472014, "percentage": 51.01, "elapsed_time": "7:28:29", "remaining_time": "7:10:41"} +{"current_steps": 2066, "total_steps": 4048, "loss": 0.2723502218723297, "lr": 1.0494119530563812e-05, "epoch": 1.0207586803410356, "percentage": 51.04, "elapsed_time": "7:28:42", "remaining_time": "7:10:27"} +{"current_steps": 2067, "total_steps": 4048, "loss": 0.2704971432685852, "lr": 1.0485958754975156e-05, "epoch": 1.0212529346348695, "percentage": 51.06, "elapsed_time": "7:28:56", "remaining_time": "7:10:15"} +{"current_steps": 2068, "total_steps": 4048, "loss": 0.30302050709724426, "lr": 1.0477797654967376e-05, "epoch": 1.0217471889287038, "percentage": 51.09, "elapsed_time": "7:29:09", "remaining_time": "7:10:02"} +{"current_steps": 2069, "total_steps": 4048, "loss": 0.26408523321151733, "lr": 1.0469636235988711e-05, "epoch": 1.022241443222538, "percentage": 51.11, "elapsed_time": "7:29:22", "remaining_time": "7:09:49"} +{"current_steps": 2070, "total_steps": 4048, "loss": 0.2691786289215088, "lr": 1.0461474503487606e-05, "epoch": 1.0227356975163722, "percentage": 51.14, "elapsed_time": "7:29:35", "remaining_time": "7:09:36"} +{"current_steps": 2071, "total_steps": 4048, "loss": 0.2823137640953064, "lr": 1.0453312462912714e-05, "epoch": 1.0232299518102064, "percentage": 51.16, "elapsed_time": "7:29:49", "remaining_time": "7:09:24"} +{"current_steps": 2072, "total_steps": 4048, "loss": 0.28837013244628906, "lr": 1.04451501197129e-05, "epoch": 1.0237242061040406, "percentage": 51.19, "elapsed_time": "7:30:02", "remaining_time": "7:09:11"} +{"current_steps": 2073, "total_steps": 4048, "loss": 0.2809562683105469, "lr": 1.0436987479337229e-05, "epoch": 1.0242184603978748, "percentage": 51.21, "elapsed_time": "7:30:15", "remaining_time": "7:08:58"} +{"current_steps": 2074, "total_steps": 4048, "loss": 0.2604525685310364, "lr": 1.0428824547234956e-05, "epoch": 1.024712714691709, "percentage": 51.24, "elapsed_time": "7:30:28", "remaining_time": "7:08:45"} +{"current_steps": 2075, "total_steps": 4048, "loss": 0.24755606055259705, "lr": 1.0420661328855546e-05, "epoch": 1.025206968985543, "percentage": 51.26, "elapsed_time": "7:30:42", "remaining_time": "7:08:32"} +{"current_steps": 2076, "total_steps": 4048, "loss": 0.2592730224132538, "lr": 1.0412497829648642e-05, "epoch": 1.0257012232793772, "percentage": 51.28, "elapsed_time": "7:30:55", "remaining_time": "7:08:20"} +{"current_steps": 2077, "total_steps": 4048, "loss": 0.2693594694137573, "lr": 1.0404334055064083e-05, "epoch": 1.0261954775732114, "percentage": 51.31, "elapsed_time": "7:31:08", "remaining_time": "7:08:07"} +{"current_steps": 2078, "total_steps": 4048, "loss": 0.2712753117084503, "lr": 1.0396170010551881e-05, "epoch": 1.0266897318670456, "percentage": 51.33, "elapsed_time": "7:31:22", "remaining_time": "7:07:54"} +{"current_steps": 2079, "total_steps": 4048, "loss": 0.2693077027797699, "lr": 1.0388005701562245e-05, "epoch": 1.0271839861608798, "percentage": 51.36, "elapsed_time": "7:31:35", "remaining_time": "7:07:41"} +{"current_steps": 2080, "total_steps": 4048, "loss": 0.2791144847869873, "lr": 1.0379841133545544e-05, "epoch": 1.027678240454714, "percentage": 51.38, "elapsed_time": "7:31:48", "remaining_time": "7:07:28"} +{"current_steps": 2081, "total_steps": 4048, "loss": 0.27496254444122314, "lr": 1.037167631195233e-05, "epoch": 1.0281724947485482, "percentage": 51.41, "elapsed_time": "7:32:01", "remaining_time": "7:07:16"} +{"current_steps": 2082, "total_steps": 4048, "loss": 0.26037347316741943, "lr": 1.0363511242233322e-05, "epoch": 1.0286667490423822, "percentage": 51.43, "elapsed_time": "7:32:15", "remaining_time": "7:07:03"} +{"current_steps": 2083, "total_steps": 4048, "loss": 0.2610514760017395, "lr": 1.0355345929839402e-05, "epoch": 1.0291610033362164, "percentage": 51.46, "elapsed_time": "7:32:28", "remaining_time": "7:06:50"} +{"current_steps": 2084, "total_steps": 4048, "loss": 0.24750857055187225, "lr": 1.0347180380221618e-05, "epoch": 1.0296552576300506, "percentage": 51.48, "elapsed_time": "7:32:41", "remaining_time": "7:06:37"} +{"current_steps": 2085, "total_steps": 4048, "loss": 0.2835415303707123, "lr": 1.0339014598831169e-05, "epoch": 1.0301495119238848, "percentage": 51.51, "elapsed_time": "7:32:54", "remaining_time": "7:06:24"} +{"current_steps": 2086, "total_steps": 4048, "loss": 0.25762057304382324, "lr": 1.033084859111942e-05, "epoch": 1.030643766217719, "percentage": 51.53, "elapsed_time": "7:33:08", "remaining_time": "7:06:12"} +{"current_steps": 2087, "total_steps": 4048, "loss": 0.2818237841129303, "lr": 1.032268236253788e-05, "epoch": 1.0311380205115532, "percentage": 51.56, "elapsed_time": "7:33:21", "remaining_time": "7:05:59"} +{"current_steps": 2088, "total_steps": 4048, "loss": 0.27192944288253784, "lr": 1.0314515918538202e-05, "epoch": 1.0316322748053874, "percentage": 51.58, "elapsed_time": "7:33:34", "remaining_time": "7:05:46"} +{"current_steps": 2089, "total_steps": 4048, "loss": 0.3002319931983948, "lr": 1.0306349264572195e-05, "epoch": 1.0321265290992216, "percentage": 51.61, "elapsed_time": "7:33:47", "remaining_time": "7:05:33"} +{"current_steps": 2090, "total_steps": 4048, "loss": 0.27106401324272156, "lr": 1.0298182406091794e-05, "epoch": 1.0326207833930556, "percentage": 51.63, "elapsed_time": "7:34:00", "remaining_time": "7:05:20"} +{"current_steps": 2091, "total_steps": 4048, "loss": 0.2740558385848999, "lr": 1.0290015348549076e-05, "epoch": 1.0331150376868898, "percentage": 51.66, "elapsed_time": "7:34:14", "remaining_time": "7:05:07"} +{"current_steps": 2092, "total_steps": 4048, "loss": 0.2970008850097656, "lr": 1.0281848097396261e-05, "epoch": 1.033609291980724, "percentage": 51.68, "elapsed_time": "7:34:27", "remaining_time": "7:04:54"} +{"current_steps": 2093, "total_steps": 4048, "loss": 0.27684125304222107, "lr": 1.027368065808568e-05, "epoch": 1.0341035462745582, "percentage": 51.7, "elapsed_time": "7:34:40", "remaining_time": "7:04:42"} +{"current_steps": 2094, "total_steps": 4048, "loss": 0.2732700705528259, "lr": 1.0265513036069803e-05, "epoch": 1.0345978005683925, "percentage": 51.73, "elapsed_time": "7:34:53", "remaining_time": "7:04:29"} +{"current_steps": 2095, "total_steps": 4048, "loss": 0.25189805030822754, "lr": 1.0257345236801215e-05, "epoch": 1.0350920548622267, "percentage": 51.75, "elapsed_time": "7:35:07", "remaining_time": "7:04:16"} +{"current_steps": 2096, "total_steps": 4048, "loss": 0.3177054524421692, "lr": 1.0249177265732629e-05, "epoch": 1.0355863091560609, "percentage": 51.78, "elapsed_time": "7:35:20", "remaining_time": "7:04:03"} +{"current_steps": 2097, "total_steps": 4048, "loss": 0.23350921273231506, "lr": 1.0241009128316854e-05, "epoch": 1.036080563449895, "percentage": 51.8, "elapsed_time": "7:35:34", "remaining_time": "7:03:51"} +{"current_steps": 2098, "total_steps": 4048, "loss": 0.3011140525341034, "lr": 1.0232840830006832e-05, "epoch": 1.036574817743729, "percentage": 51.83, "elapsed_time": "7:35:46", "remaining_time": "7:03:37"} +{"current_steps": 2099, "total_steps": 4048, "loss": 0.2578561305999756, "lr": 1.0224672376255598e-05, "epoch": 1.0370690720375633, "percentage": 51.85, "elapsed_time": "7:36:00", "remaining_time": "7:03:25"} +{"current_steps": 2100, "total_steps": 4048, "loss": 0.2622804045677185, "lr": 1.0216503772516297e-05, "epoch": 1.0375633263313975, "percentage": 51.88, "elapsed_time": "7:36:13", "remaining_time": "7:03:11"} +{"current_steps": 2101, "total_steps": 4048, "loss": 0.2662869691848755, "lr": 1.0208335024242169e-05, "epoch": 1.0380575806252317, "percentage": 51.9, "elapsed_time": "7:36:32", "remaining_time": "7:03:04"} +{"current_steps": 2102, "total_steps": 4048, "loss": 0.27084922790527344, "lr": 1.0200166136886558e-05, "epoch": 1.0385518349190659, "percentage": 51.93, "elapsed_time": "7:36:45", "remaining_time": "7:02:51"} +{"current_steps": 2103, "total_steps": 4048, "loss": 0.26290780305862427, "lr": 1.0191997115902891e-05, "epoch": 1.0390460892129, "percentage": 51.95, "elapsed_time": "7:36:58", "remaining_time": "7:02:38"} +{"current_steps": 2104, "total_steps": 4048, "loss": 0.27367106080055237, "lr": 1.0183827966744694e-05, "epoch": 1.0395403435067343, "percentage": 51.98, "elapsed_time": "7:37:12", "remaining_time": "7:02:26"} +{"current_steps": 2105, "total_steps": 4048, "loss": 0.28507113456726074, "lr": 1.0175658694865574e-05, "epoch": 1.0400345978005685, "percentage": 52.0, "elapsed_time": "7:37:25", "remaining_time": "7:02:13"} +{"current_steps": 2106, "total_steps": 4048, "loss": 0.2533179521560669, "lr": 1.0167489305719221e-05, "epoch": 1.0405288520944025, "percentage": 52.03, "elapsed_time": "7:37:38", "remaining_time": "7:02:00"} +{"current_steps": 2107, "total_steps": 4048, "loss": 0.28755924105644226, "lr": 1.0159319804759398e-05, "epoch": 1.0410231063882367, "percentage": 52.05, "elapsed_time": "7:37:51", "remaining_time": "7:01:46"} +{"current_steps": 2108, "total_steps": 4048, "loss": 0.26722773909568787, "lr": 1.015115019743995e-05, "epoch": 1.041517360682071, "percentage": 52.08, "elapsed_time": "7:38:04", "remaining_time": "7:01:34"} +{"current_steps": 2109, "total_steps": 4048, "loss": 0.3122308850288391, "lr": 1.0142980489214788e-05, "epoch": 1.042011614975905, "percentage": 52.1, "elapsed_time": "7:38:17", "remaining_time": "7:01:20"} +{"current_steps": 2110, "total_steps": 4048, "loss": 0.22603261470794678, "lr": 1.0134810685537899e-05, "epoch": 1.0425058692697393, "percentage": 52.12, "elapsed_time": "7:38:30", "remaining_time": "7:01:08"} +{"current_steps": 2111, "total_steps": 4048, "loss": 0.2823299169540405, "lr": 1.0126640791863316e-05, "epoch": 1.0430001235635735, "percentage": 52.15, "elapsed_time": "7:38:43", "remaining_time": "7:00:54"} +{"current_steps": 2112, "total_steps": 4048, "loss": 0.30999040603637695, "lr": 1.0118470813645156e-05, "epoch": 1.0434943778574077, "percentage": 52.17, "elapsed_time": "7:38:56", "remaining_time": "7:00:42"} +{"current_steps": 2113, "total_steps": 4048, "loss": 0.266022264957428, "lr": 1.0110300756337569e-05, "epoch": 1.0439886321512417, "percentage": 52.2, "elapsed_time": "7:39:09", "remaining_time": "7:00:28"} +{"current_steps": 2114, "total_steps": 4048, "loss": 0.2674095034599304, "lr": 1.0102130625394776e-05, "epoch": 1.044482886445076, "percentage": 52.22, "elapsed_time": "7:39:23", "remaining_time": "7:00:16"} +{"current_steps": 2115, "total_steps": 4048, "loss": 0.30045652389526367, "lr": 1.0093960426271037e-05, "epoch": 1.0449771407389101, "percentage": 52.25, "elapsed_time": "7:39:35", "remaining_time": "7:00:02"} +{"current_steps": 2116, "total_steps": 4048, "loss": 0.28455668687820435, "lr": 1.0085790164420659e-05, "epoch": 1.0454713950327443, "percentage": 52.27, "elapsed_time": "7:39:48", "remaining_time": "6:59:49"} +{"current_steps": 2117, "total_steps": 4048, "loss": 0.2429066300392151, "lr": 1.0077619845297992e-05, "epoch": 1.0459656493265785, "percentage": 52.3, "elapsed_time": "7:40:01", "remaining_time": "6:59:36"} +{"current_steps": 2118, "total_steps": 4048, "loss": 0.2515121102333069, "lr": 1.0069449474357427e-05, "epoch": 1.0464599036204127, "percentage": 52.32, "elapsed_time": "7:40:14", "remaining_time": "6:59:23"} +{"current_steps": 2119, "total_steps": 4048, "loss": 0.30011802911758423, "lr": 1.0061279057053385e-05, "epoch": 1.046954157914247, "percentage": 52.35, "elapsed_time": "7:40:27", "remaining_time": "6:59:10"} +{"current_steps": 2120, "total_steps": 4048, "loss": 0.2577645480632782, "lr": 1.005310859884032e-05, "epoch": 1.0474484122080812, "percentage": 52.37, "elapsed_time": "7:40:39", "remaining_time": "6:58:56"} +{"current_steps": 2121, "total_steps": 4048, "loss": 0.21476465463638306, "lr": 1.0044938105172713e-05, "epoch": 1.0479426665019151, "percentage": 52.4, "elapsed_time": "7:40:52", "remaining_time": "6:58:43"} +{"current_steps": 2122, "total_steps": 4048, "loss": 0.2587023079395294, "lr": 1.0036767581505067e-05, "epoch": 1.0484369207957493, "percentage": 52.42, "elapsed_time": "7:41:05", "remaining_time": "6:58:30"} +{"current_steps": 2123, "total_steps": 4048, "loss": 0.2537185251712799, "lr": 1.0028597033291911e-05, "epoch": 1.0489311750895836, "percentage": 52.45, "elapsed_time": "7:41:18", "remaining_time": "6:58:17"} +{"current_steps": 2124, "total_steps": 4048, "loss": 0.24486014246940613, "lr": 1.0020426465987782e-05, "epoch": 1.0494254293834178, "percentage": 52.47, "elapsed_time": "7:41:31", "remaining_time": "6:58:03"} +{"current_steps": 2125, "total_steps": 4048, "loss": 0.2728436589241028, "lr": 1.0012255885047241e-05, "epoch": 1.049919683677252, "percentage": 52.5, "elapsed_time": "7:41:44", "remaining_time": "6:57:51"} +{"current_steps": 2126, "total_steps": 4048, "loss": 0.30238842964172363, "lr": 1.0004085295924843e-05, "epoch": 1.0504139379710862, "percentage": 52.52, "elapsed_time": "7:41:57", "remaining_time": "6:57:38"} +{"current_steps": 2127, "total_steps": 4048, "loss": 0.30347609519958496, "lr": 9.99591470407516e-06, "epoch": 1.0509081922649204, "percentage": 52.54, "elapsed_time": "7:42:10", "remaining_time": "6:57:25"} +{"current_steps": 2128, "total_steps": 4048, "loss": 0.2581411302089691, "lr": 9.987744114952764e-06, "epoch": 1.0514024465587544, "percentage": 52.57, "elapsed_time": "7:42:23", "remaining_time": "6:57:12"} +{"current_steps": 2129, "total_steps": 4048, "loss": 0.239881694316864, "lr": 9.979573534012218e-06, "epoch": 1.0518967008525886, "percentage": 52.59, "elapsed_time": "7:42:37", "remaining_time": "6:56:59"} +{"current_steps": 2130, "total_steps": 4048, "loss": 0.3058615028858185, "lr": 9.971402966708092e-06, "epoch": 1.0523909551464228, "percentage": 52.62, "elapsed_time": "7:42:50", "remaining_time": "6:56:46"} +{"current_steps": 2131, "total_steps": 4048, "loss": 0.25285837054252625, "lr": 9.963232418494936e-06, "epoch": 1.052885209440257, "percentage": 52.64, "elapsed_time": "7:43:03", "remaining_time": "6:56:33"} +{"current_steps": 2132, "total_steps": 4048, "loss": 0.27366510033607483, "lr": 9.955061894827294e-06, "epoch": 1.0533794637340912, "percentage": 52.67, "elapsed_time": "7:43:16", "remaining_time": "6:56:20"} +{"current_steps": 2133, "total_steps": 4048, "loss": 0.22268086671829224, "lr": 9.946891401159683e-06, "epoch": 1.0538737180279254, "percentage": 52.69, "elapsed_time": "7:43:29", "remaining_time": "6:56:07"} +{"current_steps": 2134, "total_steps": 4048, "loss": 0.2540682554244995, "lr": 9.938720942946616e-06, "epoch": 1.0543679723217596, "percentage": 52.72, "elapsed_time": "7:43:42", "remaining_time": "6:55:54"} +{"current_steps": 2135, "total_steps": 4048, "loss": 0.262179970741272, "lr": 9.930550525642576e-06, "epoch": 1.0548622266155938, "percentage": 52.74, "elapsed_time": "7:43:56", "remaining_time": "6:55:41"} +{"current_steps": 2136, "total_steps": 4048, "loss": 0.25471946597099304, "lr": 9.92238015470201e-06, "epoch": 1.0553564809094278, "percentage": 52.77, "elapsed_time": "7:44:09", "remaining_time": "6:55:28"} +{"current_steps": 2137, "total_steps": 4048, "loss": 0.2580556571483612, "lr": 9.914209835579344e-06, "epoch": 1.055850735203262, "percentage": 52.79, "elapsed_time": "7:44:22", "remaining_time": "6:55:15"} +{"current_steps": 2138, "total_steps": 4048, "loss": 0.29909616708755493, "lr": 9.906039573728964e-06, "epoch": 1.0563449894970962, "percentage": 52.82, "elapsed_time": "7:44:35", "remaining_time": "6:55:03"} +{"current_steps": 2139, "total_steps": 4048, "loss": 0.2828724980354309, "lr": 9.897869374605226e-06, "epoch": 1.0568392437909304, "percentage": 52.84, "elapsed_time": "7:44:48", "remaining_time": "6:54:50"} +{"current_steps": 2140, "total_steps": 4048, "loss": 0.26731711626052856, "lr": 9.889699243662433e-06, "epoch": 1.0573334980847646, "percentage": 52.87, "elapsed_time": "7:45:02", "remaining_time": "6:54:37"} +{"current_steps": 2141, "total_steps": 4048, "loss": 0.2912555932998657, "lr": 9.88152918635485e-06, "epoch": 1.0578277523785988, "percentage": 52.89, "elapsed_time": "7:45:15", "remaining_time": "6:54:24"} +{"current_steps": 2142, "total_steps": 4048, "loss": 0.2335313856601715, "lr": 9.873359208136685e-06, "epoch": 1.058322006672433, "percentage": 52.92, "elapsed_time": "7:45:28", "remaining_time": "6:54:11"} +{"current_steps": 2143, "total_steps": 4048, "loss": 0.2716987729072571, "lr": 9.865189314462105e-06, "epoch": 1.0588162609662672, "percentage": 52.94, "elapsed_time": "7:45:41", "remaining_time": "6:53:57"} +{"current_steps": 2144, "total_steps": 4048, "loss": 0.2919968068599701, "lr": 9.857019510785215e-06, "epoch": 1.0593105152601012, "percentage": 52.96, "elapsed_time": "7:45:54", "remaining_time": "6:53:44"} +{"current_steps": 2145, "total_steps": 4048, "loss": 0.26279503107070923, "lr": 9.848849802560057e-06, "epoch": 1.0598047695539354, "percentage": 52.99, "elapsed_time": "7:46:06", "remaining_time": "6:53:31"} +{"current_steps": 2146, "total_steps": 4048, "loss": 0.31622597575187683, "lr": 9.840680195240606e-06, "epoch": 1.0602990238477696, "percentage": 53.01, "elapsed_time": "7:46:19", "remaining_time": "6:53:18"} +{"current_steps": 2147, "total_steps": 4048, "loss": 0.2399556040763855, "lr": 9.832510694280782e-06, "epoch": 1.0607932781416038, "percentage": 53.04, "elapsed_time": "7:46:32", "remaining_time": "6:53:04"} +{"current_steps": 2148, "total_steps": 4048, "loss": 0.2650333046913147, "lr": 9.824341305134428e-06, "epoch": 1.061287532435438, "percentage": 53.06, "elapsed_time": "7:46:45", "remaining_time": "6:52:51"} +{"current_steps": 2149, "total_steps": 4048, "loss": 0.26629161834716797, "lr": 9.816172033255307e-06, "epoch": 1.0617817867292723, "percentage": 53.09, "elapsed_time": "7:46:57", "remaining_time": "6:52:38"} +{"current_steps": 2150, "total_steps": 4048, "loss": 0.28042545914649963, "lr": 9.808002884097109e-06, "epoch": 1.0622760410231065, "percentage": 53.11, "elapsed_time": "7:47:10", "remaining_time": "6:52:24"} +{"current_steps": 2151, "total_steps": 4048, "loss": 0.24374082684516907, "lr": 9.799833863113445e-06, "epoch": 1.0627702953169407, "percentage": 53.14, "elapsed_time": "7:47:22", "remaining_time": "6:52:11"} +{"current_steps": 2152, "total_steps": 4048, "loss": 0.23013898730278015, "lr": 9.791664975757835e-06, "epoch": 1.0632645496107747, "percentage": 53.16, "elapsed_time": "7:47:35", "remaining_time": "6:51:58"} +{"current_steps": 2153, "total_steps": 4048, "loss": 0.25313276052474976, "lr": 9.783496227483706e-06, "epoch": 1.0637588039046089, "percentage": 53.19, "elapsed_time": "7:47:48", "remaining_time": "6:51:44"} +{"current_steps": 2154, "total_steps": 4048, "loss": 0.2642362713813782, "lr": 9.775327623744403e-06, "epoch": 1.064253058198443, "percentage": 53.21, "elapsed_time": "7:48:00", "remaining_time": "6:51:31"} +{"current_steps": 2155, "total_steps": 4048, "loss": 0.2417108118534088, "lr": 9.76715916999317e-06, "epoch": 1.0647473124922773, "percentage": 53.24, "elapsed_time": "7:48:13", "remaining_time": "6:51:18"} +{"current_steps": 2156, "total_steps": 4048, "loss": 0.25653502345085144, "lr": 9.758990871683148e-06, "epoch": 1.0652415667861115, "percentage": 53.26, "elapsed_time": "7:48:26", "remaining_time": "6:51:04"} +{"current_steps": 2157, "total_steps": 4048, "loss": 0.247604638338089, "lr": 9.750822734267378e-06, "epoch": 1.0657358210799457, "percentage": 53.29, "elapsed_time": "7:48:39", "remaining_time": "6:50:51"} +{"current_steps": 2158, "total_steps": 4048, "loss": 0.2675636112689972, "lr": 9.742654763198786e-06, "epoch": 1.06623007537378, "percentage": 53.31, "elapsed_time": "7:48:51", "remaining_time": "6:50:38"} +{"current_steps": 2159, "total_steps": 4048, "loss": 0.2570686340332031, "lr": 9.7344869639302e-06, "epoch": 1.0667243296676139, "percentage": 53.33, "elapsed_time": "7:49:05", "remaining_time": "6:50:25"} +{"current_steps": 2160, "total_steps": 4048, "loss": 0.3046165704727173, "lr": 9.726319341914323e-06, "epoch": 1.067218583961448, "percentage": 53.36, "elapsed_time": "7:49:17", "remaining_time": "6:50:12"} +{"current_steps": 2161, "total_steps": 4048, "loss": 0.24278515577316284, "lr": 9.718151902603744e-06, "epoch": 1.0677128382552823, "percentage": 53.38, "elapsed_time": "7:49:31", "remaining_time": "6:49:59"} +{"current_steps": 2162, "total_steps": 4048, "loss": 0.2565615773200989, "lr": 9.709984651450924e-06, "epoch": 1.0682070925491165, "percentage": 53.41, "elapsed_time": "7:49:44", "remaining_time": "6:49:46"} +{"current_steps": 2163, "total_steps": 4048, "loss": 0.2672972083091736, "lr": 9.701817593908209e-06, "epoch": 1.0687013468429507, "percentage": 53.43, "elapsed_time": "7:49:57", "remaining_time": "6:49:33"} +{"current_steps": 2164, "total_steps": 4048, "loss": 0.21376445889472961, "lr": 9.693650735427808e-06, "epoch": 1.069195601136785, "percentage": 53.46, "elapsed_time": "7:50:10", "remaining_time": "6:49:20"} +{"current_steps": 2165, "total_steps": 4048, "loss": 0.27743393182754517, "lr": 9.685484081461802e-06, "epoch": 1.0696898554306191, "percentage": 53.48, "elapsed_time": "7:50:23", "remaining_time": "6:49:07"} +{"current_steps": 2166, "total_steps": 4048, "loss": 0.2747134566307068, "lr": 9.677317637462125e-06, "epoch": 1.0701841097244533, "percentage": 53.51, "elapsed_time": "7:50:36", "remaining_time": "6:48:54"} +{"current_steps": 2167, "total_steps": 4048, "loss": 0.2775312066078186, "lr": 9.669151408880581e-06, "epoch": 1.0706783640182873, "percentage": 53.53, "elapsed_time": "7:50:49", "remaining_time": "6:48:41"} +{"current_steps": 2168, "total_steps": 4048, "loss": 0.2743167281150818, "lr": 9.660985401168833e-06, "epoch": 1.0711726183121215, "percentage": 53.56, "elapsed_time": "7:51:03", "remaining_time": "6:48:28"} +{"current_steps": 2169, "total_steps": 4048, "loss": 0.26030686497688293, "lr": 9.652819619778387e-06, "epoch": 1.0716668726059557, "percentage": 53.58, "elapsed_time": "7:51:16", "remaining_time": "6:48:15"} +{"current_steps": 2170, "total_steps": 4048, "loss": 0.32307812571525574, "lr": 9.644654070160603e-06, "epoch": 1.07216112689979, "percentage": 53.61, "elapsed_time": "7:51:29", "remaining_time": "6:48:03"} +{"current_steps": 2171, "total_steps": 4048, "loss": 0.2773011028766632, "lr": 9.63648875776668e-06, "epoch": 1.0726553811936241, "percentage": 53.63, "elapsed_time": "7:51:42", "remaining_time": "6:47:50"} +{"current_steps": 2172, "total_steps": 4048, "loss": 0.27996528148651123, "lr": 9.628323688047672e-06, "epoch": 1.0731496354874583, "percentage": 53.66, "elapsed_time": "7:51:56", "remaining_time": "6:47:37"} +{"current_steps": 2173, "total_steps": 4048, "loss": 0.28022176027297974, "lr": 9.620158866454459e-06, "epoch": 1.0736438897812925, "percentage": 53.68, "elapsed_time": "7:52:09", "remaining_time": "6:47:24"} +{"current_steps": 2174, "total_steps": 4048, "loss": 0.2688876986503601, "lr": 9.61199429843776e-06, "epoch": 1.0741381440751268, "percentage": 53.71, "elapsed_time": "7:52:22", "remaining_time": "6:47:11"} +{"current_steps": 2175, "total_steps": 4048, "loss": 0.23915211856365204, "lr": 9.60382998944812e-06, "epoch": 1.0746323983689607, "percentage": 53.73, "elapsed_time": "7:52:35", "remaining_time": "6:46:58"} +{"current_steps": 2176, "total_steps": 4048, "loss": 0.2533806264400482, "lr": 9.59566594493592e-06, "epoch": 1.075126652662795, "percentage": 53.75, "elapsed_time": "7:52:48", "remaining_time": "6:46:45"} +{"current_steps": 2177, "total_steps": 4048, "loss": 0.2887522876262665, "lr": 9.587502170351361e-06, "epoch": 1.0756209069566292, "percentage": 53.78, "elapsed_time": "7:53:01", "remaining_time": "6:46:32"} +{"current_steps": 2178, "total_steps": 4048, "loss": 0.2885408401489258, "lr": 9.579338671144459e-06, "epoch": 1.0761151612504634, "percentage": 53.8, "elapsed_time": "7:53:14", "remaining_time": "6:46:19"} +{"current_steps": 2179, "total_steps": 4048, "loss": 0.25656914710998535, "lr": 9.571175452765045e-06, "epoch": 1.0766094155442976, "percentage": 53.83, "elapsed_time": "7:53:27", "remaining_time": "6:46:06"} +{"current_steps": 2180, "total_steps": 4048, "loss": 0.2935143709182739, "lr": 9.563012520662773e-06, "epoch": 1.0771036698381318, "percentage": 53.85, "elapsed_time": "7:53:40", "remaining_time": "6:45:53"} +{"current_steps": 2181, "total_steps": 4048, "loss": 0.26728200912475586, "lr": 9.554849880287103e-06, "epoch": 1.077597924131966, "percentage": 53.88, "elapsed_time": "7:53:53", "remaining_time": "6:45:40"} +{"current_steps": 2182, "total_steps": 4048, "loss": 0.2558351159095764, "lr": 9.546687537087287e-06, "epoch": 1.0780921784258002, "percentage": 53.9, "elapsed_time": "7:54:07", "remaining_time": "6:45:27"} +{"current_steps": 2183, "total_steps": 4048, "loss": 0.2517240047454834, "lr": 9.538525496512394e-06, "epoch": 1.0785864327196342, "percentage": 53.93, "elapsed_time": "7:54:19", "remaining_time": "6:45:14"} +{"current_steps": 2184, "total_steps": 4048, "loss": 0.23258647322654724, "lr": 9.53036376401129e-06, "epoch": 1.0790806870134684, "percentage": 53.95, "elapsed_time": "7:54:33", "remaining_time": "6:45:01"} +{"current_steps": 2185, "total_steps": 4048, "loss": 0.24100016057491302, "lr": 9.522202345032627e-06, "epoch": 1.0795749413073026, "percentage": 53.98, "elapsed_time": "7:54:45", "remaining_time": "6:44:47"} +{"current_steps": 2186, "total_steps": 4048, "loss": 0.27807697653770447, "lr": 9.51404124502485e-06, "epoch": 1.0800691956011368, "percentage": 54.0, "elapsed_time": "7:54:58", "remaining_time": "6:44:34"} +{"current_steps": 2187, "total_steps": 4048, "loss": 0.26146867871284485, "lr": 9.50588046943619e-06, "epoch": 1.080563449894971, "percentage": 54.03, "elapsed_time": "7:55:12", "remaining_time": "6:44:22"} +{"current_steps": 2188, "total_steps": 4048, "loss": 0.28570955991744995, "lr": 9.497720023714675e-06, "epoch": 1.0810577041888052, "percentage": 54.05, "elapsed_time": "7:55:25", "remaining_time": "6:44:08"} +{"current_steps": 2189, "total_steps": 4048, "loss": 0.22583246231079102, "lr": 9.489559913308092e-06, "epoch": 1.0815519584826394, "percentage": 54.08, "elapsed_time": "7:55:38", "remaining_time": "6:43:56"} +{"current_steps": 2190, "total_steps": 4048, "loss": 0.27526232600212097, "lr": 9.48140014366402e-06, "epoch": 1.0820462127764734, "percentage": 54.1, "elapsed_time": "7:55:51", "remaining_time": "6:43:42"} +{"current_steps": 2191, "total_steps": 4048, "loss": 0.2777514159679413, "lr": 9.473240720229803e-06, "epoch": 1.0825404670703076, "percentage": 54.13, "elapsed_time": "7:56:04", "remaining_time": "6:43:29"} +{"current_steps": 2192, "total_steps": 4048, "loss": 0.25767001509666443, "lr": 9.465081648452549e-06, "epoch": 1.0830347213641418, "percentage": 54.15, "elapsed_time": "7:56:17", "remaining_time": "6:43:16"} +{"current_steps": 2193, "total_steps": 4048, "loss": 0.24114865064620972, "lr": 9.456922933779148e-06, "epoch": 1.083528975657976, "percentage": 54.17, "elapsed_time": "7:56:30", "remaining_time": "6:43:04"} +{"current_steps": 2194, "total_steps": 4048, "loss": 0.31198200583457947, "lr": 9.448764581656237e-06, "epoch": 1.0840232299518102, "percentage": 54.2, "elapsed_time": "7:56:43", "remaining_time": "6:42:50"} +{"current_steps": 2195, "total_steps": 4048, "loss": 0.2724478840827942, "lr": 9.440606597530213e-06, "epoch": 1.0845174842456444, "percentage": 54.22, "elapsed_time": "7:56:56", "remaining_time": "6:42:37"} +{"current_steps": 2196, "total_steps": 4048, "loss": 0.27796900272369385, "lr": 9.432448986847229e-06, "epoch": 1.0850117385394786, "percentage": 54.25, "elapsed_time": "7:57:09", "remaining_time": "6:42:24"} +{"current_steps": 2197, "total_steps": 4048, "loss": 0.2877587676048279, "lr": 9.424291755053198e-06, "epoch": 1.0855059928333128, "percentage": 54.27, "elapsed_time": "7:57:22", "remaining_time": "6:42:11"} +{"current_steps": 2198, "total_steps": 4048, "loss": 0.2898337244987488, "lr": 9.416134907593764e-06, "epoch": 1.0860002471271468, "percentage": 54.3, "elapsed_time": "7:57:35", "remaining_time": "6:41:58"} +{"current_steps": 2199, "total_steps": 4048, "loss": 0.2544672191143036, "lr": 9.407978449914322e-06, "epoch": 1.086494501420981, "percentage": 54.32, "elapsed_time": "7:57:47", "remaining_time": "6:41:45"} +{"current_steps": 2200, "total_steps": 4048, "loss": 0.28336071968078613, "lr": 9.399822387460005e-06, "epoch": 1.0869887557148152, "percentage": 54.35, "elapsed_time": "7:58:01", "remaining_time": "6:41:32"} +{"current_steps": 2201, "total_steps": 4048, "loss": 0.2862734794616699, "lr": 9.391666725675691e-06, "epoch": 1.0874830100086494, "percentage": 54.37, "elapsed_time": "7:58:18", "remaining_time": "6:41:22"} +{"current_steps": 2202, "total_steps": 4048, "loss": 0.26331260800361633, "lr": 9.383511470005978e-06, "epoch": 1.0879772643024836, "percentage": 54.4, "elapsed_time": "7:58:31", "remaining_time": "6:41:09"} +{"current_steps": 2203, "total_steps": 4048, "loss": 0.30087417364120483, "lr": 9.375356625895201e-06, "epoch": 1.0884715185963179, "percentage": 54.42, "elapsed_time": "7:58:44", "remaining_time": "6:40:56"} +{"current_steps": 2204, "total_steps": 4048, "loss": 0.2736594080924988, "lr": 9.36720219878741e-06, "epoch": 1.088965772890152, "percentage": 54.45, "elapsed_time": "7:58:57", "remaining_time": "6:40:43"} +{"current_steps": 2205, "total_steps": 4048, "loss": 0.2704418897628784, "lr": 9.359048194126395e-06, "epoch": 1.089460027183986, "percentage": 54.47, "elapsed_time": "7:59:09", "remaining_time": "6:40:29"} +{"current_steps": 2206, "total_steps": 4048, "loss": 0.24540236592292786, "lr": 9.350894617355645e-06, "epoch": 1.0899542814778203, "percentage": 54.5, "elapsed_time": "7:59:22", "remaining_time": "6:40:16"} +{"current_steps": 2207, "total_steps": 4048, "loss": 0.26376527547836304, "lr": 9.342741473918375e-06, "epoch": 1.0904485357716545, "percentage": 54.52, "elapsed_time": "7:59:35", "remaining_time": "6:40:03"} +{"current_steps": 2208, "total_steps": 4048, "loss": 0.24062004685401917, "lr": 9.334588769257502e-06, "epoch": 1.0909427900654887, "percentage": 54.55, "elapsed_time": "7:59:48", "remaining_time": "6:39:50"} +{"current_steps": 2209, "total_steps": 4048, "loss": 0.24209418892860413, "lr": 9.326436508815662e-06, "epoch": 1.0914370443593229, "percentage": 54.57, "elapsed_time": "8:00:01", "remaining_time": "6:39:36"} +{"current_steps": 2210, "total_steps": 4048, "loss": 0.2732285261154175, "lr": 9.318284698035188e-06, "epoch": 1.091931298653157, "percentage": 54.59, "elapsed_time": "8:00:14", "remaining_time": "6:39:24"} +{"current_steps": 2211, "total_steps": 4048, "loss": 0.2684158980846405, "lr": 9.310133342358106e-06, "epoch": 1.0924255529469913, "percentage": 54.62, "elapsed_time": "8:00:26", "remaining_time": "6:39:10"} +{"current_steps": 2212, "total_steps": 4048, "loss": 0.22511601448059082, "lr": 9.301982447226145e-06, "epoch": 1.0929198072408255, "percentage": 54.64, "elapsed_time": "8:00:39", "remaining_time": "6:38:57"} +{"current_steps": 2213, "total_steps": 4048, "loss": 0.2622867226600647, "lr": 9.293832018080731e-06, "epoch": 1.0934140615346597, "percentage": 54.67, "elapsed_time": "8:00:52", "remaining_time": "6:38:44"} +{"current_steps": 2214, "total_steps": 4048, "loss": 0.3030891418457031, "lr": 9.285682060362974e-06, "epoch": 1.0939083158284937, "percentage": 54.69, "elapsed_time": "8:01:04", "remaining_time": "6:38:30"} +{"current_steps": 2215, "total_steps": 4048, "loss": 0.24928592145442963, "lr": 9.277532579513666e-06, "epoch": 1.094402570122328, "percentage": 54.72, "elapsed_time": "8:01:17", "remaining_time": "6:38:17"} +{"current_steps": 2216, "total_steps": 4048, "loss": 0.2588339149951935, "lr": 9.269383580973285e-06, "epoch": 1.094896824416162, "percentage": 54.74, "elapsed_time": "8:01:30", "remaining_time": "6:38:03"} +{"current_steps": 2217, "total_steps": 4048, "loss": 0.2587873339653015, "lr": 9.261235070181983e-06, "epoch": 1.0953910787099963, "percentage": 54.77, "elapsed_time": "8:01:43", "remaining_time": "6:37:50"} +{"current_steps": 2218, "total_steps": 4048, "loss": 0.29420971870422363, "lr": 9.253087052579596e-06, "epoch": 1.0958853330038305, "percentage": 54.79, "elapsed_time": "8:01:55", "remaining_time": "6:37:37"} +{"current_steps": 2219, "total_steps": 4048, "loss": 0.25384342670440674, "lr": 9.244939533605619e-06, "epoch": 1.0963795872976647, "percentage": 54.82, "elapsed_time": "8:02:08", "remaining_time": "6:37:24"} +{"current_steps": 2220, "total_steps": 4048, "loss": 0.23133251070976257, "lr": 9.236792518699224e-06, "epoch": 1.096873841591499, "percentage": 54.84, "elapsed_time": "8:02:20", "remaining_time": "6:37:10"} +{"current_steps": 2221, "total_steps": 4048, "loss": 0.26196008920669556, "lr": 9.228646013299233e-06, "epoch": 1.097368095885333, "percentage": 54.87, "elapsed_time": "8:02:33", "remaining_time": "6:36:57"} +{"current_steps": 2222, "total_steps": 4048, "loss": 0.2567690908908844, "lr": 9.220500022844144e-06, "epoch": 1.0978623501791671, "percentage": 54.89, "elapsed_time": "8:02:46", "remaining_time": "6:36:43"} +{"current_steps": 2223, "total_steps": 4048, "loss": 0.2555367350578308, "lr": 9.212354552772107e-06, "epoch": 1.0983566044730013, "percentage": 54.92, "elapsed_time": "8:02:59", "remaining_time": "6:36:30"} +{"current_steps": 2224, "total_steps": 4048, "loss": 0.24357245862483978, "lr": 9.204209608520913e-06, "epoch": 1.0988508587668355, "percentage": 54.94, "elapsed_time": "8:03:11", "remaining_time": "6:36:17"} +{"current_steps": 2225, "total_steps": 4048, "loss": 0.2792712450027466, "lr": 9.19606519552801e-06, "epoch": 1.0993451130606697, "percentage": 54.97, "elapsed_time": "8:03:24", "remaining_time": "6:36:03"} +{"current_steps": 2226, "total_steps": 4048, "loss": 0.29090794920921326, "lr": 9.1879213192305e-06, "epoch": 1.099839367354504, "percentage": 54.99, "elapsed_time": "8:03:36", "remaining_time": "6:35:50"} +{"current_steps": 2227, "total_steps": 4048, "loss": 0.2777528762817383, "lr": 9.179777985065115e-06, "epoch": 1.1003336216483381, "percentage": 55.01, "elapsed_time": "8:03:49", "remaining_time": "6:35:37"} +{"current_steps": 2228, "total_steps": 4048, "loss": 0.263868123292923, "lr": 9.171635198468227e-06, "epoch": 1.1008278759421724, "percentage": 55.04, "elapsed_time": "8:04:01", "remaining_time": "6:35:23"} +{"current_steps": 2229, "total_steps": 4048, "loss": 0.24118748307228088, "lr": 9.16349296487584e-06, "epoch": 1.1013221302360063, "percentage": 55.06, "elapsed_time": "8:04:14", "remaining_time": "6:35:10"} +{"current_steps": 2230, "total_steps": 4048, "loss": 0.2176896631717682, "lr": 9.155351289723603e-06, "epoch": 1.1018163845298405, "percentage": 55.09, "elapsed_time": "8:04:27", "remaining_time": "6:34:57"} +{"current_steps": 2231, "total_steps": 4048, "loss": 0.24727840721607208, "lr": 9.147210178446776e-06, "epoch": 1.1023106388236747, "percentage": 55.11, "elapsed_time": "8:04:40", "remaining_time": "6:34:43"} +{"current_steps": 2232, "total_steps": 4048, "loss": 0.2711295783519745, "lr": 9.139069636480247e-06, "epoch": 1.102804893117509, "percentage": 55.14, "elapsed_time": "8:04:53", "remaining_time": "6:34:30"} +{"current_steps": 2233, "total_steps": 4048, "loss": 0.2987736165523529, "lr": 9.130929669258525e-06, "epoch": 1.1032991474113432, "percentage": 55.16, "elapsed_time": "8:05:05", "remaining_time": "6:34:17"} +{"current_steps": 2234, "total_steps": 4048, "loss": 0.2773835062980652, "lr": 9.122790282215743e-06, "epoch": 1.1037934017051774, "percentage": 55.19, "elapsed_time": "8:05:18", "remaining_time": "6:34:04"} +{"current_steps": 2235, "total_steps": 4048, "loss": 0.29417523741722107, "lr": 9.114651480785632e-06, "epoch": 1.1042876559990116, "percentage": 55.21, "elapsed_time": "8:05:31", "remaining_time": "6:33:51"} +{"current_steps": 2236, "total_steps": 4048, "loss": 0.2642611265182495, "lr": 9.106513270401545e-06, "epoch": 1.1047819102928456, "percentage": 55.24, "elapsed_time": "8:05:44", "remaining_time": "6:33:38"} +{"current_steps": 2237, "total_steps": 4048, "loss": 0.2789427638053894, "lr": 9.098375656496434e-06, "epoch": 1.1052761645866798, "percentage": 55.26, "elapsed_time": "8:05:56", "remaining_time": "6:33:24"} +{"current_steps": 2238, "total_steps": 4048, "loss": 0.3002237379550934, "lr": 9.090238644502845e-06, "epoch": 1.105770418880514, "percentage": 55.29, "elapsed_time": "8:06:10", "remaining_time": "6:33:11"} +{"current_steps": 2239, "total_steps": 4048, "loss": 0.27620676159858704, "lr": 9.082102239852942e-06, "epoch": 1.1062646731743482, "percentage": 55.31, "elapsed_time": "8:06:22", "remaining_time": "6:32:58"} +{"current_steps": 2240, "total_steps": 4048, "loss": 0.26718735694885254, "lr": 9.07396644797847e-06, "epoch": 1.1067589274681824, "percentage": 55.34, "elapsed_time": "8:06:36", "remaining_time": "6:32:45"} +{"current_steps": 2241, "total_steps": 4048, "loss": 0.27443817257881165, "lr": 9.065831274310763e-06, "epoch": 1.1072531817620166, "percentage": 55.36, "elapsed_time": "8:06:48", "remaining_time": "6:32:32"} +{"current_steps": 2242, "total_steps": 4048, "loss": 0.2536284923553467, "lr": 9.057696724280748e-06, "epoch": 1.1077474360558508, "percentage": 55.39, "elapsed_time": "8:07:02", "remaining_time": "6:32:19"} +{"current_steps": 2243, "total_steps": 4048, "loss": 0.2583077549934387, "lr": 9.049562803318942e-06, "epoch": 1.108241690349685, "percentage": 55.41, "elapsed_time": "8:07:14", "remaining_time": "6:32:06"} +{"current_steps": 2244, "total_steps": 4048, "loss": 0.2696278393268585, "lr": 9.041429516855427e-06, "epoch": 1.108735944643519, "percentage": 55.43, "elapsed_time": "8:07:28", "remaining_time": "6:31:53"} +{"current_steps": 2245, "total_steps": 4048, "loss": 0.2966364622116089, "lr": 9.033296870319868e-06, "epoch": 1.1092301989373532, "percentage": 55.46, "elapsed_time": "8:07:40", "remaining_time": "6:31:39"} +{"current_steps": 2246, "total_steps": 4048, "loss": 0.22690679132938385, "lr": 9.025164869141503e-06, "epoch": 1.1097244532311874, "percentage": 55.48, "elapsed_time": "8:07:53", "remaining_time": "6:31:26"} +{"current_steps": 2247, "total_steps": 4048, "loss": 0.2777915894985199, "lr": 9.017033518749147e-06, "epoch": 1.1102187075250216, "percentage": 55.51, "elapsed_time": "8:08:06", "remaining_time": "6:31:13"} +{"current_steps": 2248, "total_steps": 4048, "loss": 0.2890303134918213, "lr": 9.008902824571168e-06, "epoch": 1.1107129618188558, "percentage": 55.53, "elapsed_time": "8:08:19", "remaining_time": "6:31:00"} +{"current_steps": 2249, "total_steps": 4048, "loss": 0.22669392824172974, "lr": 9.000772792035505e-06, "epoch": 1.11120721611269, "percentage": 55.56, "elapsed_time": "8:08:32", "remaining_time": "6:30:47"} +{"current_steps": 2250, "total_steps": 4048, "loss": 0.26416563987731934, "lr": 8.992643426569643e-06, "epoch": 1.1117014704065242, "percentage": 55.58, "elapsed_time": "8:08:45", "remaining_time": "6:30:34"} +{"current_steps": 2251, "total_steps": 4048, "loss": 0.2745298147201538, "lr": 8.984514733600641e-06, "epoch": 1.1121957247003584, "percentage": 55.61, "elapsed_time": "8:08:58", "remaining_time": "6:30:21"} +{"current_steps": 2252, "total_steps": 4048, "loss": 0.31175684928894043, "lr": 8.97638671855509e-06, "epoch": 1.1126899789941924, "percentage": 55.63, "elapsed_time": "8:09:10", "remaining_time": "6:30:07"} +{"current_steps": 2253, "total_steps": 4048, "loss": 0.2632657289505005, "lr": 8.968259386859146e-06, "epoch": 1.1131842332880266, "percentage": 55.66, "elapsed_time": "8:09:23", "remaining_time": "6:29:54"} +{"current_steps": 2254, "total_steps": 4048, "loss": 0.25820252299308777, "lr": 8.960132743938485e-06, "epoch": 1.1136784875818608, "percentage": 55.68, "elapsed_time": "8:09:36", "remaining_time": "6:29:41"} +{"current_steps": 2255, "total_steps": 4048, "loss": 0.24255456030368805, "lr": 8.95200679521835e-06, "epoch": 1.114172741875695, "percentage": 55.71, "elapsed_time": "8:09:49", "remaining_time": "6:29:27"} +{"current_steps": 2256, "total_steps": 4048, "loss": 0.2973442077636719, "lr": 8.943881546123506e-06, "epoch": 1.1146669961695292, "percentage": 55.73, "elapsed_time": "8:10:01", "remaining_time": "6:29:14"} +{"current_steps": 2257, "total_steps": 4048, "loss": 0.23320606350898743, "lr": 8.935757002078252e-06, "epoch": 1.1151612504633635, "percentage": 55.76, "elapsed_time": "8:10:14", "remaining_time": "6:29:01"} +{"current_steps": 2258, "total_steps": 4048, "loss": 0.2923268675804138, "lr": 8.927633168506415e-06, "epoch": 1.1156555047571977, "percentage": 55.78, "elapsed_time": "8:10:27", "remaining_time": "6:28:47"} +{"current_steps": 2259, "total_steps": 4048, "loss": 0.25932425260543823, "lr": 8.91951005083135e-06, "epoch": 1.1161497590510319, "percentage": 55.81, "elapsed_time": "8:10:40", "remaining_time": "6:28:34"} +{"current_steps": 2260, "total_steps": 4048, "loss": 0.2631821036338806, "lr": 8.911387654475943e-06, "epoch": 1.1166440133448658, "percentage": 55.83, "elapsed_time": "8:10:52", "remaining_time": "6:28:21"} +{"current_steps": 2261, "total_steps": 4048, "loss": 0.24741420149803162, "lr": 8.903265984862581e-06, "epoch": 1.1171382676387, "percentage": 55.85, "elapsed_time": "8:11:05", "remaining_time": "6:28:08"} +{"current_steps": 2262, "total_steps": 4048, "loss": 0.2593516707420349, "lr": 8.895145047413178e-06, "epoch": 1.1176325219325343, "percentage": 55.88, "elapsed_time": "8:11:18", "remaining_time": "6:27:54"} +{"current_steps": 2263, "total_steps": 4048, "loss": 0.22109609842300415, "lr": 8.88702484754915e-06, "epoch": 1.1181267762263685, "percentage": 55.9, "elapsed_time": "8:11:30", "remaining_time": "6:27:41"} +{"current_steps": 2264, "total_steps": 4048, "loss": 0.24363039433956146, "lr": 8.878905390691437e-06, "epoch": 1.1186210305202027, "percentage": 55.93, "elapsed_time": "8:11:43", "remaining_time": "6:27:28"} +{"current_steps": 2265, "total_steps": 4048, "loss": 0.2507505714893341, "lr": 8.870786682260465e-06, "epoch": 1.1191152848140369, "percentage": 55.95, "elapsed_time": "8:11:55", "remaining_time": "6:27:14"} +{"current_steps": 2266, "total_steps": 4048, "loss": 0.303046315908432, "lr": 8.86266872767617e-06, "epoch": 1.119609539107871, "percentage": 55.98, "elapsed_time": "8:12:08", "remaining_time": "6:27:01"} +{"current_steps": 2267, "total_steps": 4048, "loss": 0.257943332195282, "lr": 8.854551532357977e-06, "epoch": 1.120103793401705, "percentage": 56.0, "elapsed_time": "8:12:21", "remaining_time": "6:26:48"} +{"current_steps": 2268, "total_steps": 4048, "loss": 0.2697421610355377, "lr": 8.84643510172482e-06, "epoch": 1.1205980476955393, "percentage": 56.03, "elapsed_time": "8:12:34", "remaining_time": "6:26:35"} +{"current_steps": 2269, "total_steps": 4048, "loss": 0.20090234279632568, "lr": 8.838319441195105e-06, "epoch": 1.1210923019893735, "percentage": 56.05, "elapsed_time": "8:12:47", "remaining_time": "6:26:22"} +{"current_steps": 2270, "total_steps": 4048, "loss": 0.2714189887046814, "lr": 8.830204556186736e-06, "epoch": 1.1215865562832077, "percentage": 56.08, "elapsed_time": "8:13:00", "remaining_time": "6:26:09"} +{"current_steps": 2271, "total_steps": 4048, "loss": 0.23497477173805237, "lr": 8.822090452117084e-06, "epoch": 1.122080810577042, "percentage": 56.1, "elapsed_time": "8:13:12", "remaining_time": "6:25:55"} +{"current_steps": 2272, "total_steps": 4048, "loss": 0.2582445740699768, "lr": 8.81397713440302e-06, "epoch": 1.122575064870876, "percentage": 56.13, "elapsed_time": "8:13:26", "remaining_time": "6:25:42"} +{"current_steps": 2273, "total_steps": 4048, "loss": 0.26494619250297546, "lr": 8.805864608460876e-06, "epoch": 1.1230693191647103, "percentage": 56.15, "elapsed_time": "8:13:38", "remaining_time": "6:25:29"} +{"current_steps": 2274, "total_steps": 4048, "loss": 0.2767868936061859, "lr": 8.797752879706455e-06, "epoch": 1.1235635734585445, "percentage": 56.18, "elapsed_time": "8:13:51", "remaining_time": "6:25:16"} +{"current_steps": 2275, "total_steps": 4048, "loss": 0.27696311473846436, "lr": 8.789641953555032e-06, "epoch": 1.1240578277523785, "percentage": 56.2, "elapsed_time": "8:14:04", "remaining_time": "6:25:03"} +{"current_steps": 2276, "total_steps": 4048, "loss": 0.27048689126968384, "lr": 8.78153183542135e-06, "epoch": 1.1245520820462127, "percentage": 56.23, "elapsed_time": "8:14:16", "remaining_time": "6:24:49"} +{"current_steps": 2277, "total_steps": 4048, "loss": 0.2940211892127991, "lr": 8.773422530719606e-06, "epoch": 1.125046336340047, "percentage": 56.25, "elapsed_time": "8:14:29", "remaining_time": "6:24:36"} +{"current_steps": 2278, "total_steps": 4048, "loss": 0.24859851598739624, "lr": 8.765314044863453e-06, "epoch": 1.1255405906338811, "percentage": 56.27, "elapsed_time": "8:14:42", "remaining_time": "6:24:23"} +{"current_steps": 2279, "total_steps": 4048, "loss": 0.28879350423812866, "lr": 8.757206383265998e-06, "epoch": 1.1260348449277153, "percentage": 56.3, "elapsed_time": "8:14:55", "remaining_time": "6:24:10"} +{"current_steps": 2280, "total_steps": 4048, "loss": 0.24804209172725677, "lr": 8.74909955133981e-06, "epoch": 1.1265290992215495, "percentage": 56.32, "elapsed_time": "8:15:08", "remaining_time": "6:23:56"} +{"current_steps": 2281, "total_steps": 4048, "loss": 0.3199496567249298, "lr": 8.740993554496886e-06, "epoch": 1.1270233535153837, "percentage": 56.35, "elapsed_time": "8:15:21", "remaining_time": "6:23:43"} +{"current_steps": 2282, "total_steps": 4048, "loss": 0.3098929524421692, "lr": 8.732888398148678e-06, "epoch": 1.1275176078092177, "percentage": 56.37, "elapsed_time": "8:15:33", "remaining_time": "6:23:30"} +{"current_steps": 2283, "total_steps": 4048, "loss": 0.21280749142169952, "lr": 8.724784087706067e-06, "epoch": 1.128011862103052, "percentage": 56.4, "elapsed_time": "8:15:46", "remaining_time": "6:23:17"} +{"current_steps": 2284, "total_steps": 4048, "loss": 0.25330856442451477, "lr": 8.716680628579382e-06, "epoch": 1.1285061163968861, "percentage": 56.42, "elapsed_time": "8:15:59", "remaining_time": "6:23:04"} +{"current_steps": 2285, "total_steps": 4048, "loss": 0.26141977310180664, "lr": 8.708578026178371e-06, "epoch": 1.1290003706907203, "percentage": 56.45, "elapsed_time": "8:16:12", "remaining_time": "6:22:50"} +{"current_steps": 2286, "total_steps": 4048, "loss": 0.2529010772705078, "lr": 8.700476285912219e-06, "epoch": 1.1294946249845546, "percentage": 56.47, "elapsed_time": "8:16:24", "remaining_time": "6:22:37"} +{"current_steps": 2287, "total_steps": 4048, "loss": 0.2662504315376282, "lr": 8.69237541318953e-06, "epoch": 1.1299888792783888, "percentage": 56.5, "elapsed_time": "8:16:37", "remaining_time": "6:22:24"} +{"current_steps": 2288, "total_steps": 4048, "loss": 0.2724575996398926, "lr": 8.684275413418329e-06, "epoch": 1.130483133572223, "percentage": 56.52, "elapsed_time": "8:16:50", "remaining_time": "6:22:11"} +{"current_steps": 2289, "total_steps": 4048, "loss": 0.2820962965488434, "lr": 8.676176292006065e-06, "epoch": 1.1309773878660572, "percentage": 56.55, "elapsed_time": "8:17:03", "remaining_time": "6:21:58"} +{"current_steps": 2290, "total_steps": 4048, "loss": 0.2594743072986603, "lr": 8.668078054359595e-06, "epoch": 1.1314716421598914, "percentage": 56.57, "elapsed_time": "8:17:16", "remaining_time": "6:21:44"} +{"current_steps": 2291, "total_steps": 4048, "loss": 0.25397709012031555, "lr": 8.659980705885183e-06, "epoch": 1.1319658964537254, "percentage": 56.6, "elapsed_time": "8:17:29", "remaining_time": "6:21:32"} +{"current_steps": 2292, "total_steps": 4048, "loss": 0.27261337637901306, "lr": 8.651884251988503e-06, "epoch": 1.1324601507475596, "percentage": 56.62, "elapsed_time": "8:17:42", "remaining_time": "6:21:18"} +{"current_steps": 2293, "total_steps": 4048, "loss": 0.2726992070674896, "lr": 8.643788698074638e-06, "epoch": 1.1329544050413938, "percentage": 56.65, "elapsed_time": "8:17:55", "remaining_time": "6:21:05"} +{"current_steps": 2294, "total_steps": 4048, "loss": 0.2792774438858032, "lr": 8.635694049548058e-06, "epoch": 1.133448659335228, "percentage": 56.67, "elapsed_time": "8:18:07", "remaining_time": "6:20:52"} +{"current_steps": 2295, "total_steps": 4048, "loss": 0.310885488986969, "lr": 8.627600311812638e-06, "epoch": 1.1339429136290622, "percentage": 56.69, "elapsed_time": "8:18:20", "remaining_time": "6:20:38"} +{"current_steps": 2296, "total_steps": 4048, "loss": 0.27060413360595703, "lr": 8.619507490271638e-06, "epoch": 1.1344371679228964, "percentage": 56.72, "elapsed_time": "8:18:33", "remaining_time": "6:20:25"} +{"current_steps": 2297, "total_steps": 4048, "loss": 0.27069440484046936, "lr": 8.611415590327718e-06, "epoch": 1.1349314222167306, "percentage": 56.74, "elapsed_time": "8:18:45", "remaining_time": "6:20:12"} +{"current_steps": 2298, "total_steps": 4048, "loss": 0.2790459990501404, "lr": 8.603324617382905e-06, "epoch": 1.1354256765105646, "percentage": 56.77, "elapsed_time": "8:18:58", "remaining_time": "6:19:59"} +{"current_steps": 2299, "total_steps": 4048, "loss": 0.27170947194099426, "lr": 8.595234576838624e-06, "epoch": 1.1359199308043988, "percentage": 56.79, "elapsed_time": "8:19:11", "remaining_time": "6:19:46"} +{"current_steps": 2300, "total_steps": 4048, "loss": 0.25313863158226013, "lr": 8.587145474095665e-06, "epoch": 1.136414185098233, "percentage": 56.82, "elapsed_time": "8:19:24", "remaining_time": "6:19:33"} +{"current_steps": 2301, "total_steps": 4048, "loss": 0.289467990398407, "lr": 8.5790573145542e-06, "epoch": 1.1369084393920672, "percentage": 56.84, "elapsed_time": "8:19:43", "remaining_time": "6:19:24"} +{"current_steps": 2302, "total_steps": 4048, "loss": 0.29796460270881653, "lr": 8.570970103613774e-06, "epoch": 1.1374026936859014, "percentage": 56.87, "elapsed_time": "8:19:56", "remaining_time": "6:19:11"} +{"current_steps": 2303, "total_steps": 4048, "loss": 0.27264270186424255, "lr": 8.562883846673286e-06, "epoch": 1.1378969479797356, "percentage": 56.89, "elapsed_time": "8:20:09", "remaining_time": "6:18:58"} +{"current_steps": 2304, "total_steps": 4048, "loss": 0.3099757134914398, "lr": 8.554798549131005e-06, "epoch": 1.1383912022735698, "percentage": 56.92, "elapsed_time": "8:20:23", "remaining_time": "6:18:45"} +{"current_steps": 2305, "total_steps": 4048, "loss": 0.30002498626708984, "lr": 8.546714216384565e-06, "epoch": 1.138885456567404, "percentage": 56.94, "elapsed_time": "8:20:36", "remaining_time": "6:18:32"} +{"current_steps": 2306, "total_steps": 4048, "loss": 0.2428818643093109, "lr": 8.538630853830951e-06, "epoch": 1.139379710861238, "percentage": 56.97, "elapsed_time": "8:20:49", "remaining_time": "6:18:20"} +{"current_steps": 2307, "total_steps": 4048, "loss": 0.2601294219493866, "lr": 8.530548466866497e-06, "epoch": 1.1398739651550722, "percentage": 56.99, "elapsed_time": "8:21:02", "remaining_time": "6:18:07"} +{"current_steps": 2308, "total_steps": 4048, "loss": 0.23878628015518188, "lr": 8.522467060886888e-06, "epoch": 1.1403682194489064, "percentage": 57.02, "elapsed_time": "8:21:15", "remaining_time": "6:17:53"} +{"current_steps": 2309, "total_steps": 4048, "loss": 0.2780643403530121, "lr": 8.514386641287163e-06, "epoch": 1.1408624737427406, "percentage": 57.04, "elapsed_time": "8:21:28", "remaining_time": "6:17:41"} +{"current_steps": 2310, "total_steps": 4048, "loss": 0.29834824800491333, "lr": 8.506307213461689e-06, "epoch": 1.1413567280365748, "percentage": 57.07, "elapsed_time": "8:21:41", "remaining_time": "6:17:28"} +{"current_steps": 2311, "total_steps": 4048, "loss": 0.2733996510505676, "lr": 8.498228782804175e-06, "epoch": 1.141850982330409, "percentage": 57.09, "elapsed_time": "8:21:55", "remaining_time": "6:17:15"} +{"current_steps": 2312, "total_steps": 4048, "loss": 0.2524843215942383, "lr": 8.490151354707669e-06, "epoch": 1.1423452366242433, "percentage": 57.11, "elapsed_time": "8:22:08", "remaining_time": "6:17:02"} +{"current_steps": 2313, "total_steps": 4048, "loss": 0.29077857732772827, "lr": 8.482074934564543e-06, "epoch": 1.1428394909180772, "percentage": 57.14, "elapsed_time": "8:22:21", "remaining_time": "6:16:49"} +{"current_steps": 2314, "total_steps": 4048, "loss": 0.25935155153274536, "lr": 8.473999527766503e-06, "epoch": 1.1433337452119114, "percentage": 57.16, "elapsed_time": "8:22:34", "remaining_time": "6:16:36"} +{"current_steps": 2315, "total_steps": 4048, "loss": 0.23595012724399567, "lr": 8.465925139704578e-06, "epoch": 1.1438279995057457, "percentage": 57.19, "elapsed_time": "8:22:48", "remaining_time": "6:16:24"} +{"current_steps": 2316, "total_steps": 4048, "loss": 0.25193360447883606, "lr": 8.457851775769108e-06, "epoch": 1.1443222537995799, "percentage": 57.21, "elapsed_time": "8:23:02", "remaining_time": "6:16:11"} +{"current_steps": 2317, "total_steps": 4048, "loss": 0.26844412088394165, "lr": 8.449779441349755e-06, "epoch": 1.144816508093414, "percentage": 57.24, "elapsed_time": "8:23:15", "remaining_time": "6:15:58"} +{"current_steps": 2318, "total_steps": 4048, "loss": 0.2507320046424866, "lr": 8.441708141835499e-06, "epoch": 1.1453107623872483, "percentage": 57.26, "elapsed_time": "8:23:29", "remaining_time": "6:15:45"} +{"current_steps": 2319, "total_steps": 4048, "loss": 0.2756047248840332, "lr": 8.433637882614624e-06, "epoch": 1.1458050166810825, "percentage": 57.29, "elapsed_time": "8:23:42", "remaining_time": "6:15:33"} +{"current_steps": 2320, "total_steps": 4048, "loss": 0.3136482536792755, "lr": 8.425568669074717e-06, "epoch": 1.1462992709749167, "percentage": 57.31, "elapsed_time": "8:23:55", "remaining_time": "6:15:20"} +{"current_steps": 2321, "total_steps": 4048, "loss": 0.25975438952445984, "lr": 8.417500506602668e-06, "epoch": 1.146793525268751, "percentage": 57.34, "elapsed_time": "8:24:08", "remaining_time": "6:15:07"} +{"current_steps": 2322, "total_steps": 4048, "loss": 0.2524915039539337, "lr": 8.409433400584674e-06, "epoch": 1.1472877795625849, "percentage": 57.36, "elapsed_time": "8:24:21", "remaining_time": "6:14:54"} +{"current_steps": 2323, "total_steps": 4048, "loss": 0.2731180787086487, "lr": 8.401367356406214e-06, "epoch": 1.147782033856419, "percentage": 57.39, "elapsed_time": "8:24:35", "remaining_time": "6:14:41"} +{"current_steps": 2324, "total_steps": 4048, "loss": 0.27752095460891724, "lr": 8.393302379452065e-06, "epoch": 1.1482762881502533, "percentage": 57.41, "elapsed_time": "8:24:48", "remaining_time": "6:14:28"} +{"current_steps": 2325, "total_steps": 4048, "loss": 0.269240140914917, "lr": 8.385238475106287e-06, "epoch": 1.1487705424440875, "percentage": 57.44, "elapsed_time": "8:25:01", "remaining_time": "6:14:15"} +{"current_steps": 2326, "total_steps": 4048, "loss": 0.2668418288230896, "lr": 8.377175648752236e-06, "epoch": 1.1492647967379217, "percentage": 57.46, "elapsed_time": "8:25:14", "remaining_time": "6:14:02"} +{"current_steps": 2327, "total_steps": 4048, "loss": 0.29276758432388306, "lr": 8.369113905772532e-06, "epoch": 1.149759051031756, "percentage": 57.49, "elapsed_time": "8:25:27", "remaining_time": "6:13:49"} +{"current_steps": 2328, "total_steps": 4048, "loss": 0.26562872529029846, "lr": 8.361053251549083e-06, "epoch": 1.15025330532559, "percentage": 57.51, "elapsed_time": "8:25:41", "remaining_time": "6:13:37"} +{"current_steps": 2329, "total_steps": 4048, "loss": 0.257779061794281, "lr": 8.352993691463063e-06, "epoch": 1.150747559619424, "percentage": 57.53, "elapsed_time": "8:25:54", "remaining_time": "6:13:24"} +{"current_steps": 2330, "total_steps": 4048, "loss": 0.2871868312358856, "lr": 8.344935230894926e-06, "epoch": 1.1512418139132583, "percentage": 57.56, "elapsed_time": "8:26:07", "remaining_time": "6:13:11"} +{"current_steps": 2331, "total_steps": 4048, "loss": 0.25191348791122437, "lr": 8.336877875224379e-06, "epoch": 1.1517360682070925, "percentage": 57.58, "elapsed_time": "8:26:21", "remaining_time": "6:12:58"} +{"current_steps": 2332, "total_steps": 4048, "loss": 0.27057239413261414, "lr": 8.3288216298304e-06, "epoch": 1.1522303225009267, "percentage": 57.61, "elapsed_time": "8:26:34", "remaining_time": "6:12:45"} +{"current_steps": 2333, "total_steps": 4048, "loss": 0.31574326753616333, "lr": 8.32076650009122e-06, "epoch": 1.152724576794761, "percentage": 57.63, "elapsed_time": "8:26:47", "remaining_time": "6:12:32"} +{"current_steps": 2334, "total_steps": 4048, "loss": 0.22503693401813507, "lr": 8.312712491384332e-06, "epoch": 1.1532188310885951, "percentage": 57.66, "elapsed_time": "8:27:00", "remaining_time": "6:12:19"} +{"current_steps": 2335, "total_steps": 4048, "loss": 0.25754863023757935, "lr": 8.304659609086478e-06, "epoch": 1.1537130853824293, "percentage": 57.68, "elapsed_time": "8:27:13", "remaining_time": "6:12:06"} +{"current_steps": 2336, "total_steps": 4048, "loss": 0.24367934465408325, "lr": 8.296607858573646e-06, "epoch": 1.1542073396762635, "percentage": 57.71, "elapsed_time": "8:27:27", "remaining_time": "6:11:54"} +{"current_steps": 2337, "total_steps": 4048, "loss": 0.28907084465026855, "lr": 8.288557245221068e-06, "epoch": 1.1547015939700975, "percentage": 57.73, "elapsed_time": "8:27:40", "remaining_time": "6:11:41"} +{"current_steps": 2338, "total_steps": 4048, "loss": 0.24526283144950867, "lr": 8.280507774403217e-06, "epoch": 1.1551958482639317, "percentage": 57.76, "elapsed_time": "8:27:53", "remaining_time": "6:11:28"} +{"current_steps": 2339, "total_steps": 4048, "loss": 0.21968787908554077, "lr": 8.272459451493811e-06, "epoch": 1.155690102557766, "percentage": 57.78, "elapsed_time": "8:28:07", "remaining_time": "6:11:15"} +{"current_steps": 2340, "total_steps": 4048, "loss": 0.23803061246871948, "lr": 8.264412281865791e-06, "epoch": 1.1561843568516001, "percentage": 57.81, "elapsed_time": "8:28:20", "remaining_time": "6:11:02"} +{"current_steps": 2341, "total_steps": 4048, "loss": 0.25715917348861694, "lr": 8.256366270891335e-06, "epoch": 1.1566786111454344, "percentage": 57.83, "elapsed_time": "8:28:34", "remaining_time": "6:10:50"} +{"current_steps": 2342, "total_steps": 4048, "loss": 0.29443520307540894, "lr": 8.248321423941836e-06, "epoch": 1.1571728654392686, "percentage": 57.86, "elapsed_time": "8:28:47", "remaining_time": "6:10:37"} +{"current_steps": 2343, "total_steps": 4048, "loss": 0.24904949963092804, "lr": 8.240277746387934e-06, "epoch": 1.1576671197331028, "percentage": 57.88, "elapsed_time": "8:29:00", "remaining_time": "6:10:24"} +{"current_steps": 2344, "total_steps": 4048, "loss": 0.2594628632068634, "lr": 8.23223524359946e-06, "epoch": 1.1581613740269368, "percentage": 57.91, "elapsed_time": "8:29:13", "remaining_time": "6:10:11"} +{"current_steps": 2345, "total_steps": 4048, "loss": 0.23853302001953125, "lr": 8.224193920945482e-06, "epoch": 1.158655628320771, "percentage": 57.93, "elapsed_time": "8:29:26", "remaining_time": "6:09:58"} +{"current_steps": 2346, "total_steps": 4048, "loss": 0.25465112924575806, "lr": 8.216153783794266e-06, "epoch": 1.1591498826146052, "percentage": 57.95, "elapsed_time": "8:29:39", "remaining_time": "6:09:45"} +{"current_steps": 2347, "total_steps": 4048, "loss": 0.28038230538368225, "lr": 8.208114837513297e-06, "epoch": 1.1596441369084394, "percentage": 57.98, "elapsed_time": "8:29:53", "remaining_time": "6:09:32"} +{"current_steps": 2348, "total_steps": 4048, "loss": 0.3144591450691223, "lr": 8.200077087469262e-06, "epoch": 1.1601383912022736, "percentage": 58.0, "elapsed_time": "8:30:06", "remaining_time": "6:09:19"} +{"current_steps": 2349, "total_steps": 4048, "loss": 0.25782787799835205, "lr": 8.192040539028047e-06, "epoch": 1.1606326454961078, "percentage": 58.03, "elapsed_time": "8:30:19", "remaining_time": "6:09:06"} +{"current_steps": 2350, "total_steps": 4048, "loss": 0.21928566694259644, "lr": 8.18400519755473e-06, "epoch": 1.161126899789942, "percentage": 58.05, "elapsed_time": "8:30:32", "remaining_time": "6:08:53"} +{"current_steps": 2351, "total_steps": 4048, "loss": 0.2277221381664276, "lr": 8.175971068413598e-06, "epoch": 1.1616211540837762, "percentage": 58.08, "elapsed_time": "8:30:45", "remaining_time": "6:08:40"} +{"current_steps": 2352, "total_steps": 4048, "loss": 0.26971378922462463, "lr": 8.16793815696812e-06, "epoch": 1.1621154083776104, "percentage": 58.1, "elapsed_time": "8:30:59", "remaining_time": "6:08:27"} +{"current_steps": 2353, "total_steps": 4048, "loss": 0.26448535919189453, "lr": 8.15990646858095e-06, "epoch": 1.1626096626714444, "percentage": 58.13, "elapsed_time": "8:31:12", "remaining_time": "6:08:14"} +{"current_steps": 2354, "total_steps": 4048, "loss": 0.26372095942497253, "lr": 8.151876008613927e-06, "epoch": 1.1631039169652786, "percentage": 58.15, "elapsed_time": "8:31:25", "remaining_time": "6:08:02"} +{"current_steps": 2355, "total_steps": 4048, "loss": 0.2594243288040161, "lr": 8.143846782428078e-06, "epoch": 1.1635981712591128, "percentage": 58.18, "elapsed_time": "8:31:38", "remaining_time": "6:07:49"} +{"current_steps": 2356, "total_steps": 4048, "loss": 0.23994986712932587, "lr": 8.135818795383597e-06, "epoch": 1.164092425552947, "percentage": 58.2, "elapsed_time": "8:31:52", "remaining_time": "6:07:36"} +{"current_steps": 2357, "total_steps": 4048, "loss": 0.2746032476425171, "lr": 8.12779205283985e-06, "epoch": 1.1645866798467812, "percentage": 58.23, "elapsed_time": "8:32:05", "remaining_time": "6:07:23"} +{"current_steps": 2358, "total_steps": 4048, "loss": 0.3323846161365509, "lr": 8.119766560155377e-06, "epoch": 1.1650809341406154, "percentage": 58.25, "elapsed_time": "8:32:18", "remaining_time": "6:07:10"} +{"current_steps": 2359, "total_steps": 4048, "loss": 0.28155508637428284, "lr": 8.111742322687886e-06, "epoch": 1.1655751884344494, "percentage": 58.28, "elapsed_time": "8:32:31", "remaining_time": "6:06:57"} +{"current_steps": 2360, "total_steps": 4048, "loss": 0.2936748266220093, "lr": 8.103719345794237e-06, "epoch": 1.1660694427282836, "percentage": 58.3, "elapsed_time": "8:32:44", "remaining_time": "6:06:44"} +{"current_steps": 2361, "total_steps": 4048, "loss": 0.23575282096862793, "lr": 8.095697634830463e-06, "epoch": 1.1665636970221178, "percentage": 58.33, "elapsed_time": "8:32:57", "remaining_time": "6:06:31"} +{"current_steps": 2362, "total_steps": 4048, "loss": 0.24547496438026428, "lr": 8.087677195151737e-06, "epoch": 1.167057951315952, "percentage": 58.35, "elapsed_time": "8:33:11", "remaining_time": "6:06:18"} +{"current_steps": 2363, "total_steps": 4048, "loss": 0.2936372458934784, "lr": 8.079658032112388e-06, "epoch": 1.1675522056097862, "percentage": 58.37, "elapsed_time": "8:33:24", "remaining_time": "6:06:05"} +{"current_steps": 2364, "total_steps": 4048, "loss": 0.28602418303489685, "lr": 8.071640151065902e-06, "epoch": 1.1680464599036204, "percentage": 58.4, "elapsed_time": "8:33:37", "remaining_time": "6:05:53"} +{"current_steps": 2365, "total_steps": 4048, "loss": 0.2742761969566345, "lr": 8.0636235573649e-06, "epoch": 1.1685407141974546, "percentage": 58.42, "elapsed_time": "8:33:51", "remaining_time": "6:05:40"} +{"current_steps": 2366, "total_steps": 4048, "loss": 0.2590268552303314, "lr": 8.05560825636114e-06, "epoch": 1.1690349684912889, "percentage": 58.45, "elapsed_time": "8:34:04", "remaining_time": "6:05:27"} +{"current_steps": 2367, "total_steps": 4048, "loss": 0.26881399750709534, "lr": 8.047594253405525e-06, "epoch": 1.169529222785123, "percentage": 58.47, "elapsed_time": "8:34:17", "remaining_time": "6:05:14"} +{"current_steps": 2368, "total_steps": 4048, "loss": 0.27069953083992004, "lr": 8.039581553848093e-06, "epoch": 1.170023477078957, "percentage": 58.5, "elapsed_time": "8:34:30", "remaining_time": "6:05:01"} +{"current_steps": 2369, "total_steps": 4048, "loss": 0.27320611476898193, "lr": 8.031570163038005e-06, "epoch": 1.1705177313727912, "percentage": 58.52, "elapsed_time": "8:34:44", "remaining_time": "6:04:48"} +{"current_steps": 2370, "total_steps": 4048, "loss": 0.26400327682495117, "lr": 8.023560086323548e-06, "epoch": 1.1710119856666255, "percentage": 58.55, "elapsed_time": "8:34:57", "remaining_time": "6:04:35"} +{"current_steps": 2371, "total_steps": 4048, "loss": 0.22287744283676147, "lr": 8.015551329052136e-06, "epoch": 1.1715062399604597, "percentage": 58.57, "elapsed_time": "8:35:10", "remaining_time": "6:04:22"} +{"current_steps": 2372, "total_steps": 4048, "loss": 0.28240424394607544, "lr": 8.007543896570309e-06, "epoch": 1.1720004942542939, "percentage": 58.6, "elapsed_time": "8:35:23", "remaining_time": "6:04:09"} +{"current_steps": 2373, "total_steps": 4048, "loss": 0.27119147777557373, "lr": 7.999537794223702e-06, "epoch": 1.172494748548128, "percentage": 58.62, "elapsed_time": "8:35:36", "remaining_time": "6:03:56"} +{"current_steps": 2374, "total_steps": 4048, "loss": 0.2579900920391083, "lr": 7.991533027357085e-06, "epoch": 1.1729890028419623, "percentage": 58.65, "elapsed_time": "8:35:49", "remaining_time": "6:03:43"} +{"current_steps": 2375, "total_steps": 4048, "loss": 0.25550374388694763, "lr": 7.983529601314317e-06, "epoch": 1.1734832571357963, "percentage": 58.67, "elapsed_time": "8:36:03", "remaining_time": "6:03:31"} +{"current_steps": 2376, "total_steps": 4048, "loss": 0.21197429299354553, "lr": 7.97552752143838e-06, "epoch": 1.1739775114296305, "percentage": 58.7, "elapsed_time": "8:36:16", "remaining_time": "6:03:18"} +{"current_steps": 2377, "total_steps": 4048, "loss": 0.28724029660224915, "lr": 7.96752679307134e-06, "epoch": 1.1744717657234647, "percentage": 58.72, "elapsed_time": "8:36:29", "remaining_time": "6:03:05"} +{"current_steps": 2378, "total_steps": 4048, "loss": 0.24320468306541443, "lr": 7.959527421554375e-06, "epoch": 1.1749660200172989, "percentage": 58.75, "elapsed_time": "8:36:42", "remaining_time": "6:02:52"} +{"current_steps": 2379, "total_steps": 4048, "loss": 0.22487501800060272, "lr": 7.951529412227745e-06, "epoch": 1.175460274311133, "percentage": 58.77, "elapsed_time": "8:36:55", "remaining_time": "6:02:38"} +{"current_steps": 2380, "total_steps": 4048, "loss": 0.2754969894886017, "lr": 7.943532770430811e-06, "epoch": 1.1759545286049673, "percentage": 58.79, "elapsed_time": "8:37:08", "remaining_time": "6:02:26"} +{"current_steps": 2381, "total_steps": 4048, "loss": 0.2734825909137726, "lr": 7.93553750150202e-06, "epoch": 1.1764487828988015, "percentage": 58.82, "elapsed_time": "8:37:21", "remaining_time": "6:02:13"} +{"current_steps": 2382, "total_steps": 4048, "loss": 0.2803332209587097, "lr": 7.927543610778895e-06, "epoch": 1.1769430371926357, "percentage": 58.84, "elapsed_time": "8:37:34", "remaining_time": "6:02:00"} +{"current_steps": 2383, "total_steps": 4048, "loss": 0.2820316255092621, "lr": 7.919551103598037e-06, "epoch": 1.1774372914864697, "percentage": 58.87, "elapsed_time": "8:37:47", "remaining_time": "6:01:46"} +{"current_steps": 2384, "total_steps": 4048, "loss": 0.26788315176963806, "lr": 7.911559985295142e-06, "epoch": 1.177931545780304, "percentage": 58.89, "elapsed_time": "8:38:00", "remaining_time": "6:01:34"} +{"current_steps": 2385, "total_steps": 4048, "loss": 0.2562825083732605, "lr": 7.90357026120496e-06, "epoch": 1.178425800074138, "percentage": 58.92, "elapsed_time": "8:38:14", "remaining_time": "6:01:21"} +{"current_steps": 2386, "total_steps": 4048, "loss": 0.28260675072669983, "lr": 7.895581936661316e-06, "epoch": 1.1789200543679723, "percentage": 58.94, "elapsed_time": "8:38:27", "remaining_time": "6:01:08"} +{"current_steps": 2387, "total_steps": 4048, "loss": 0.25887200236320496, "lr": 7.887595016997105e-06, "epoch": 1.1794143086618065, "percentage": 58.97, "elapsed_time": "8:38:40", "remaining_time": "6:00:55"} +{"current_steps": 2388, "total_steps": 4048, "loss": 0.2351648062467575, "lr": 7.879609507544274e-06, "epoch": 1.1799085629556407, "percentage": 58.99, "elapsed_time": "8:38:53", "remaining_time": "6:00:42"} +{"current_steps": 2389, "total_steps": 4048, "loss": 0.2958889305591583, "lr": 7.871625413633843e-06, "epoch": 1.180402817249475, "percentage": 59.02, "elapsed_time": "8:39:06", "remaining_time": "6:00:29"} +{"current_steps": 2390, "total_steps": 4048, "loss": 0.29704710841178894, "lr": 7.863642740595873e-06, "epoch": 1.180897071543309, "percentage": 59.04, "elapsed_time": "8:39:20", "remaining_time": "6:00:16"} +{"current_steps": 2391, "total_steps": 4048, "loss": 0.23283210396766663, "lr": 7.855661493759488e-06, "epoch": 1.1813913258371431, "percentage": 59.07, "elapsed_time": "8:39:33", "remaining_time": "6:00:03"} +{"current_steps": 2392, "total_steps": 4048, "loss": 0.22818870842456818, "lr": 7.847681678452846e-06, "epoch": 1.1818855801309773, "percentage": 59.09, "elapsed_time": "8:39:46", "remaining_time": "5:59:50"} +{"current_steps": 2393, "total_steps": 4048, "loss": 0.2345077246427536, "lr": 7.839703300003163e-06, "epoch": 1.1823798344248115, "percentage": 59.12, "elapsed_time": "8:39:59", "remaining_time": "5:59:37"} +{"current_steps": 2394, "total_steps": 4048, "loss": 0.31161409616470337, "lr": 7.831726363736694e-06, "epoch": 1.1828740887186457, "percentage": 59.14, "elapsed_time": "8:40:12", "remaining_time": "5:59:24"} +{"current_steps": 2395, "total_steps": 4048, "loss": 0.2958439588546753, "lr": 7.823750874978724e-06, "epoch": 1.18336834301248, "percentage": 59.17, "elapsed_time": "8:40:25", "remaining_time": "5:59:11"} +{"current_steps": 2396, "total_steps": 4048, "loss": 0.24895446002483368, "lr": 7.815776839053568e-06, "epoch": 1.1838625973063142, "percentage": 59.19, "elapsed_time": "8:40:39", "remaining_time": "5:58:59"} +{"current_steps": 2397, "total_steps": 4048, "loss": 0.2691795825958252, "lr": 7.807804261284591e-06, "epoch": 1.1843568516001484, "percentage": 59.21, "elapsed_time": "8:40:52", "remaining_time": "5:58:45"} +{"current_steps": 2398, "total_steps": 4048, "loss": 0.26797783374786377, "lr": 7.799833146994165e-06, "epoch": 1.1848511058939826, "percentage": 59.24, "elapsed_time": "8:41:05", "remaining_time": "5:58:32"} +{"current_steps": 2399, "total_steps": 4048, "loss": 0.2665610611438751, "lr": 7.791863501503694e-06, "epoch": 1.1853453601878166, "percentage": 59.26, "elapsed_time": "8:41:18", "remaining_time": "5:58:20"} +{"current_steps": 2400, "total_steps": 4048, "loss": 0.24712792038917542, "lr": 7.783895330133596e-06, "epoch": 1.1858396144816508, "percentage": 59.29, "elapsed_time": "8:41:31", "remaining_time": "5:58:07"} +{"current_steps": 2401, "total_steps": 4048, "loss": 0.24131645262241364, "lr": 7.775928638203316e-06, "epoch": 1.186333868775485, "percentage": 59.31, "elapsed_time": "8:41:51", "remaining_time": "5:57:58"} +{"current_steps": 2402, "total_steps": 4048, "loss": 0.24233923852443695, "lr": 7.7679634310313e-06, "epoch": 1.1868281230693192, "percentage": 59.34, "elapsed_time": "8:42:03", "remaining_time": "5:57:45"} +{"current_steps": 2403, "total_steps": 4048, "loss": 0.24929150938987732, "lr": 7.759999713935002e-06, "epoch": 1.1873223773631534, "percentage": 59.36, "elapsed_time": "8:42:17", "remaining_time": "5:57:32"} +{"current_steps": 2404, "total_steps": 4048, "loss": 0.266767293214798, "lr": 7.752037492230887e-06, "epoch": 1.1878166316569876, "percentage": 59.39, "elapsed_time": "8:42:29", "remaining_time": "5:57:18"} +{"current_steps": 2405, "total_steps": 4048, "loss": 0.257263720035553, "lr": 7.744076771234427e-06, "epoch": 1.1883108859508218, "percentage": 59.41, "elapsed_time": "8:42:43", "remaining_time": "5:57:06"} +{"current_steps": 2406, "total_steps": 4048, "loss": 0.26949891448020935, "lr": 7.73611755626008e-06, "epoch": 1.1888051402446558, "percentage": 59.44, "elapsed_time": "8:42:55", "remaining_time": "5:56:52"} +{"current_steps": 2407, "total_steps": 4048, "loss": 0.250274121761322, "lr": 7.728159852621308e-06, "epoch": 1.18929939453849, "percentage": 59.46, "elapsed_time": "8:43:08", "remaining_time": "5:56:39"} +{"current_steps": 2408, "total_steps": 4048, "loss": 0.2442864030599594, "lr": 7.720203665630553e-06, "epoch": 1.1897936488323242, "percentage": 59.49, "elapsed_time": "8:43:21", "remaining_time": "5:56:26"} +{"current_steps": 2409, "total_steps": 4048, "loss": 0.273416131734848, "lr": 7.71224900059926e-06, "epoch": 1.1902879031261584, "percentage": 59.51, "elapsed_time": "8:43:34", "remaining_time": "5:56:13"} +{"current_steps": 2410, "total_steps": 4048, "loss": 0.2559645175933838, "lr": 7.704295862837845e-06, "epoch": 1.1907821574199926, "percentage": 59.54, "elapsed_time": "8:43:47", "remaining_time": "5:56:00"} +{"current_steps": 2411, "total_steps": 4048, "loss": 0.2793371379375458, "lr": 7.696344257655713e-06, "epoch": 1.1912764117138268, "percentage": 59.56, "elapsed_time": "8:44:01", "remaining_time": "5:55:47"} +{"current_steps": 2412, "total_steps": 4048, "loss": 0.23739437758922577, "lr": 7.688394190361235e-06, "epoch": 1.191770666007661, "percentage": 59.58, "elapsed_time": "8:44:13", "remaining_time": "5:55:34"} +{"current_steps": 2413, "total_steps": 4048, "loss": 0.27027466893196106, "lr": 7.680445666261766e-06, "epoch": 1.1922649203014952, "percentage": 59.61, "elapsed_time": "8:44:27", "remaining_time": "5:55:21"} +{"current_steps": 2414, "total_steps": 4048, "loss": 0.2641778886318207, "lr": 7.672498690663632e-06, "epoch": 1.1927591745953292, "percentage": 59.63, "elapsed_time": "8:44:39", "remaining_time": "5:55:08"} +{"current_steps": 2415, "total_steps": 4048, "loss": 0.25086820125579834, "lr": 7.664553268872116e-06, "epoch": 1.1932534288891634, "percentage": 59.66, "elapsed_time": "8:44:52", "remaining_time": "5:54:55"} +{"current_steps": 2416, "total_steps": 4048, "loss": 0.2871254086494446, "lr": 7.656609406191467e-06, "epoch": 1.1937476831829976, "percentage": 59.68, "elapsed_time": "8:45:06", "remaining_time": "5:54:42"} +{"current_steps": 2417, "total_steps": 4048, "loss": 0.2657528221607208, "lr": 7.648667107924893e-06, "epoch": 1.1942419374768318, "percentage": 59.71, "elapsed_time": "8:45:18", "remaining_time": "5:54:28"} +{"current_steps": 2418, "total_steps": 4048, "loss": 0.26942694187164307, "lr": 7.640726379374564e-06, "epoch": 1.194736191770666, "percentage": 59.73, "elapsed_time": "8:45:32", "remaining_time": "5:54:16"} +{"current_steps": 2419, "total_steps": 4048, "loss": 0.23883840441703796, "lr": 7.632787225841593e-06, "epoch": 1.1952304460645002, "percentage": 59.76, "elapsed_time": "8:45:44", "remaining_time": "5:54:02"} +{"current_steps": 2420, "total_steps": 4048, "loss": 0.24837304651737213, "lr": 7.624849652626049e-06, "epoch": 1.1957247003583344, "percentage": 59.78, "elapsed_time": "8:45:58", "remaining_time": "5:53:50"} +{"current_steps": 2421, "total_steps": 4048, "loss": 0.2882450222969055, "lr": 7.616913665026936e-06, "epoch": 1.1962189546521684, "percentage": 59.81, "elapsed_time": "8:46:11", "remaining_time": "5:53:36"} +{"current_steps": 2422, "total_steps": 4048, "loss": 0.25877460837364197, "lr": 7.608979268342213e-06, "epoch": 1.1967132089460026, "percentage": 59.83, "elapsed_time": "8:46:24", "remaining_time": "5:53:24"} +{"current_steps": 2423, "total_steps": 4048, "loss": 0.26970750093460083, "lr": 7.601046467868767e-06, "epoch": 1.1972074632398368, "percentage": 59.86, "elapsed_time": "8:46:37", "remaining_time": "5:53:10"} +{"current_steps": 2424, "total_steps": 4048, "loss": 0.23771706223487854, "lr": 7.593115268902423e-06, "epoch": 1.197701717533671, "percentage": 59.88, "elapsed_time": "8:46:50", "remaining_time": "5:52:58"} +{"current_steps": 2425, "total_steps": 4048, "loss": 0.25420787930488586, "lr": 7.585185676737932e-06, "epoch": 1.1981959718275053, "percentage": 59.91, "elapsed_time": "8:47:03", "remaining_time": "5:52:44"} +{"current_steps": 2426, "total_steps": 4048, "loss": 0.2551025152206421, "lr": 7.577257696668982e-06, "epoch": 1.1986902261213395, "percentage": 59.93, "elapsed_time": "8:47:16", "remaining_time": "5:52:32"} +{"current_steps": 2427, "total_steps": 4048, "loss": 0.2302972972393036, "lr": 7.569331333988177e-06, "epoch": 1.1991844804151737, "percentage": 59.96, "elapsed_time": "8:47:29", "remaining_time": "5:52:18"} +{"current_steps": 2428, "total_steps": 4048, "loss": 0.25811445713043213, "lr": 7.561406593987045e-06, "epoch": 1.1996787347090079, "percentage": 59.98, "elapsed_time": "8:47:42", "remaining_time": "5:52:05"} +{"current_steps": 2429, "total_steps": 4048, "loss": 0.2550782561302185, "lr": 7.5534834819560235e-06, "epoch": 1.200172989002842, "percentage": 60.0, "elapsed_time": "8:47:55", "remaining_time": "5:51:52"} +{"current_steps": 2430, "total_steps": 4048, "loss": 0.24825535714626312, "lr": 7.545562003184474e-06, "epoch": 1.200667243296676, "percentage": 60.03, "elapsed_time": "8:48:08", "remaining_time": "5:51:39"} +{"current_steps": 2431, "total_steps": 4048, "loss": 0.29703712463378906, "lr": 7.537642162960664e-06, "epoch": 1.2011614975905103, "percentage": 60.05, "elapsed_time": "8:48:22", "remaining_time": "5:51:27"} +{"current_steps": 2432, "total_steps": 4048, "loss": 0.26830747723579407, "lr": 7.5297239665717625e-06, "epoch": 1.2016557518843445, "percentage": 60.08, "elapsed_time": "8:48:35", "remaining_time": "5:51:14"} +{"current_steps": 2433, "total_steps": 4048, "loss": 0.2428341656923294, "lr": 7.521807419303846e-06, "epoch": 1.2021500061781787, "percentage": 60.1, "elapsed_time": "8:48:48", "remaining_time": "5:51:01"} +{"current_steps": 2434, "total_steps": 4048, "loss": 0.2843051552772522, "lr": 7.513892526441883e-06, "epoch": 1.202644260472013, "percentage": 60.13, "elapsed_time": "8:49:01", "remaining_time": "5:50:48"} +{"current_steps": 2435, "total_steps": 4048, "loss": 0.2485228031873703, "lr": 7.50597929326975e-06, "epoch": 1.203138514765847, "percentage": 60.15, "elapsed_time": "8:49:15", "remaining_time": "5:50:35"} +{"current_steps": 2436, "total_steps": 4048, "loss": 0.25343626737594604, "lr": 7.498067725070206e-06, "epoch": 1.203632769059681, "percentage": 60.18, "elapsed_time": "8:49:28", "remaining_time": "5:50:22"} +{"current_steps": 2437, "total_steps": 4048, "loss": 0.24906575679779053, "lr": 7.490157827124902e-06, "epoch": 1.2041270233535153, "percentage": 60.2, "elapsed_time": "8:49:42", "remaining_time": "5:50:09"} +{"current_steps": 2438, "total_steps": 4048, "loss": 0.33576443791389465, "lr": 7.4822496047143665e-06, "epoch": 1.2046212776473495, "percentage": 60.23, "elapsed_time": "8:49:55", "remaining_time": "5:49:56"} +{"current_steps": 2439, "total_steps": 4048, "loss": 0.2755683362483978, "lr": 7.474343063118023e-06, "epoch": 1.2051155319411837, "percentage": 60.25, "elapsed_time": "8:50:08", "remaining_time": "5:49:43"} +{"current_steps": 2440, "total_steps": 4048, "loss": 0.2667745351791382, "lr": 7.466438207614165e-06, "epoch": 1.205609786235018, "percentage": 60.28, "elapsed_time": "8:50:21", "remaining_time": "5:49:30"} +{"current_steps": 2441, "total_steps": 4048, "loss": 0.2970271408557892, "lr": 7.458535043479959e-06, "epoch": 1.2061040405288521, "percentage": 60.3, "elapsed_time": "8:50:33", "remaining_time": "5:49:17"} +{"current_steps": 2442, "total_steps": 4048, "loss": 0.2628048360347748, "lr": 7.450633575991442e-06, "epoch": 1.2065982948226863, "percentage": 60.33, "elapsed_time": "8:50:47", "remaining_time": "5:49:04"} +{"current_steps": 2443, "total_steps": 4048, "loss": 0.29923003911972046, "lr": 7.442733810423526e-06, "epoch": 1.2070925491165205, "percentage": 60.35, "elapsed_time": "8:51:00", "remaining_time": "5:48:51"} +{"current_steps": 2444, "total_steps": 4048, "loss": 0.2486419975757599, "lr": 7.4348357520499805e-06, "epoch": 1.2075868034103547, "percentage": 60.38, "elapsed_time": "8:51:13", "remaining_time": "5:48:38"} +{"current_steps": 2445, "total_steps": 4048, "loss": 0.2711118459701538, "lr": 7.4269394061434315e-06, "epoch": 1.2080810577041887, "percentage": 60.4, "elapsed_time": "8:51:26", "remaining_time": "5:48:25"} +{"current_steps": 2446, "total_steps": 4048, "loss": 0.2568815052509308, "lr": 7.419044777975371e-06, "epoch": 1.208575311998023, "percentage": 60.42, "elapsed_time": "8:51:39", "remaining_time": "5:48:12"} +{"current_steps": 2447, "total_steps": 4048, "loss": 0.2546462416648865, "lr": 7.411151872816143e-06, "epoch": 1.2090695662918571, "percentage": 60.45, "elapsed_time": "8:51:52", "remaining_time": "5:47:59"} +{"current_steps": 2448, "total_steps": 4048, "loss": 0.23455393314361572, "lr": 7.403260695934933e-06, "epoch": 1.2095638205856913, "percentage": 60.47, "elapsed_time": "8:52:05", "remaining_time": "5:47:46"} +{"current_steps": 2449, "total_steps": 4048, "loss": 0.2874235510826111, "lr": 7.395371252599779e-06, "epoch": 1.2100580748795255, "percentage": 60.5, "elapsed_time": "8:52:18", "remaining_time": "5:47:33"} +{"current_steps": 2450, "total_steps": 4048, "loss": 0.2462289184331894, "lr": 7.387483548077559e-06, "epoch": 1.2105523291733598, "percentage": 60.52, "elapsed_time": "8:52:31", "remaining_time": "5:47:20"} +{"current_steps": 2451, "total_steps": 4048, "loss": 0.29385364055633545, "lr": 7.379597587633998e-06, "epoch": 1.211046583467194, "percentage": 60.55, "elapsed_time": "8:52:44", "remaining_time": "5:47:07"} +{"current_steps": 2452, "total_steps": 4048, "loss": 0.25049760937690735, "lr": 7.371713376533642e-06, "epoch": 1.211540837761028, "percentage": 60.57, "elapsed_time": "8:52:57", "remaining_time": "5:46:54"} +{"current_steps": 2453, "total_steps": 4048, "loss": 0.2748974859714508, "lr": 7.363830920039887e-06, "epoch": 1.2120350920548622, "percentage": 60.6, "elapsed_time": "8:53:11", "remaining_time": "5:46:41"} +{"current_steps": 2454, "total_steps": 4048, "loss": 0.2707570791244507, "lr": 7.355950223414939e-06, "epoch": 1.2125293463486964, "percentage": 60.62, "elapsed_time": "8:53:24", "remaining_time": "5:46:28"} +{"current_steps": 2455, "total_steps": 4048, "loss": 0.2864024043083191, "lr": 7.3480712919198474e-06, "epoch": 1.2130236006425306, "percentage": 60.65, "elapsed_time": "8:53:37", "remaining_time": "5:46:15"} +{"current_steps": 2456, "total_steps": 4048, "loss": 0.3181900680065155, "lr": 7.340194130814466e-06, "epoch": 1.2135178549363648, "percentage": 60.67, "elapsed_time": "8:53:50", "remaining_time": "5:46:02"} +{"current_steps": 2457, "total_steps": 4048, "loss": 0.3022974729537964, "lr": 7.332318745357483e-06, "epoch": 1.214012109230199, "percentage": 60.7, "elapsed_time": "8:54:03", "remaining_time": "5:45:49"} +{"current_steps": 2458, "total_steps": 4048, "loss": 0.2850461006164551, "lr": 7.324445140806387e-06, "epoch": 1.2145063635240332, "percentage": 60.72, "elapsed_time": "8:54:16", "remaining_time": "5:45:36"} +{"current_steps": 2459, "total_steps": 4048, "loss": 0.21958643198013306, "lr": 7.316573322417483e-06, "epoch": 1.2150006178178674, "percentage": 60.75, "elapsed_time": "8:54:29", "remaining_time": "5:45:23"} +{"current_steps": 2460, "total_steps": 4048, "loss": 0.2517468333244324, "lr": 7.3087032954458915e-06, "epoch": 1.2154948721117014, "percentage": 60.77, "elapsed_time": "8:54:42", "remaining_time": "5:45:09"} +{"current_steps": 2461, "total_steps": 4048, "loss": 0.26957637071609497, "lr": 7.300835065145526e-06, "epoch": 1.2159891264055356, "percentage": 60.8, "elapsed_time": "8:54:55", "remaining_time": "5:44:57"} +{"current_steps": 2462, "total_steps": 4048, "loss": 0.2699058949947357, "lr": 7.292968636769103e-06, "epoch": 1.2164833806993698, "percentage": 60.82, "elapsed_time": "8:55:08", "remaining_time": "5:44:43"} +{"current_steps": 2463, "total_steps": 4048, "loss": 0.25076431035995483, "lr": 7.285104015568138e-06, "epoch": 1.216977634993204, "percentage": 60.84, "elapsed_time": "8:55:21", "remaining_time": "5:44:30"} +{"current_steps": 2464, "total_steps": 4048, "loss": 0.24862724542617798, "lr": 7.277241206792944e-06, "epoch": 1.2174718892870382, "percentage": 60.87, "elapsed_time": "8:55:33", "remaining_time": "5:44:17"} +{"current_steps": 2465, "total_steps": 4048, "loss": 0.27427712082862854, "lr": 7.269380215692614e-06, "epoch": 1.2179661435808724, "percentage": 60.89, "elapsed_time": "8:55:47", "remaining_time": "5:44:04"} +{"current_steps": 2466, "total_steps": 4048, "loss": 0.24343061447143555, "lr": 7.261521047515041e-06, "epoch": 1.2184603978747066, "percentage": 60.92, "elapsed_time": "8:56:00", "remaining_time": "5:43:51"} +{"current_steps": 2467, "total_steps": 4048, "loss": 0.25482866168022156, "lr": 7.253663707506882e-06, "epoch": 1.2189546521685406, "percentage": 60.94, "elapsed_time": "8:56:12", "remaining_time": "5:43:38"} +{"current_steps": 2468, "total_steps": 4048, "loss": 0.27699458599090576, "lr": 7.2458082009135964e-06, "epoch": 1.2194489064623748, "percentage": 60.97, "elapsed_time": "8:56:26", "remaining_time": "5:43:25"} +{"current_steps": 2469, "total_steps": 4048, "loss": 0.26576149463653564, "lr": 7.237954532979401e-06, "epoch": 1.219943160756209, "percentage": 60.99, "elapsed_time": "8:56:38", "remaining_time": "5:43:12"} +{"current_steps": 2470, "total_steps": 4048, "loss": 0.287861168384552, "lr": 7.230102708947298e-06, "epoch": 1.2204374150500432, "percentage": 61.02, "elapsed_time": "8:56:52", "remaining_time": "5:42:59"} +{"current_steps": 2471, "total_steps": 4048, "loss": 0.25484874844551086, "lr": 7.2222527340590434e-06, "epoch": 1.2209316693438774, "percentage": 61.04, "elapsed_time": "8:57:05", "remaining_time": "5:42:46"} +{"current_steps": 2472, "total_steps": 4048, "loss": 0.26371529698371887, "lr": 7.214404613555177e-06, "epoch": 1.2214259236377116, "percentage": 61.07, "elapsed_time": "8:57:18", "remaining_time": "5:42:33"} +{"current_steps": 2473, "total_steps": 4048, "loss": 0.23692578077316284, "lr": 7.206558352674992e-06, "epoch": 1.2219201779315458, "percentage": 61.09, "elapsed_time": "8:57:31", "remaining_time": "5:42:20"} +{"current_steps": 2474, "total_steps": 4048, "loss": 0.26369085907936096, "lr": 7.198713956656538e-06, "epoch": 1.22241443222538, "percentage": 61.12, "elapsed_time": "8:57:44", "remaining_time": "5:42:07"} +{"current_steps": 2475, "total_steps": 4048, "loss": 0.260580450296402, "lr": 7.1908714307366145e-06, "epoch": 1.2229086865192142, "percentage": 61.14, "elapsed_time": "8:57:57", "remaining_time": "5:41:54"} +{"current_steps": 2476, "total_steps": 4048, "loss": 0.2693007290363312, "lr": 7.1830307801507904e-06, "epoch": 1.2234029408130482, "percentage": 61.17, "elapsed_time": "8:58:10", "remaining_time": "5:41:41"} +{"current_steps": 2477, "total_steps": 4048, "loss": 0.26629775762557983, "lr": 7.1751920101333695e-06, "epoch": 1.2238971951068824, "percentage": 61.19, "elapsed_time": "8:58:23", "remaining_time": "5:41:28"} +{"current_steps": 2478, "total_steps": 4048, "loss": 0.2963234782218933, "lr": 7.167355125917399e-06, "epoch": 1.2243914494007166, "percentage": 61.22, "elapsed_time": "8:58:37", "remaining_time": "5:41:15"} +{"current_steps": 2479, "total_steps": 4048, "loss": 0.24415187537670135, "lr": 7.159520132734669e-06, "epoch": 1.2248857036945509, "percentage": 61.24, "elapsed_time": "8:58:50", "remaining_time": "5:41:02"} +{"current_steps": 2480, "total_steps": 4048, "loss": 0.2941599190235138, "lr": 7.15168703581572e-06, "epoch": 1.225379957988385, "percentage": 61.26, "elapsed_time": "8:59:02", "remaining_time": "5:40:48"} +{"current_steps": 2481, "total_steps": 4048, "loss": 0.22807514667510986, "lr": 7.1438558403898065e-06, "epoch": 1.2258742122822193, "percentage": 61.29, "elapsed_time": "8:59:15", "remaining_time": "5:40:36"} +{"current_steps": 2482, "total_steps": 4048, "loss": 0.28865426778793335, "lr": 7.136026551684923e-06, "epoch": 1.2263684665760535, "percentage": 61.31, "elapsed_time": "8:59:28", "remaining_time": "5:40:22"} +{"current_steps": 2483, "total_steps": 4048, "loss": 0.3015780448913574, "lr": 7.1281991749277945e-06, "epoch": 1.2268627208698875, "percentage": 61.34, "elapsed_time": "8:59:41", "remaining_time": "5:40:09"} +{"current_steps": 2484, "total_steps": 4048, "loss": 0.2521517872810364, "lr": 7.12037371534386e-06, "epoch": 1.2273569751637217, "percentage": 61.36, "elapsed_time": "8:59:54", "remaining_time": "5:39:56"} +{"current_steps": 2485, "total_steps": 4048, "loss": 0.2904277443885803, "lr": 7.1125501781572896e-06, "epoch": 1.2278512294575559, "percentage": 61.39, "elapsed_time": "9:00:07", "remaining_time": "5:39:43"} +{"current_steps": 2486, "total_steps": 4048, "loss": 0.26172375679016113, "lr": 7.104728568590966e-06, "epoch": 1.22834548375139, "percentage": 61.41, "elapsed_time": "9:00:20", "remaining_time": "5:39:30"} +{"current_steps": 2487, "total_steps": 4048, "loss": 0.23565448820590973, "lr": 7.096908891866483e-06, "epoch": 1.2288397380452243, "percentage": 61.44, "elapsed_time": "9:00:33", "remaining_time": "5:39:17"} +{"current_steps": 2488, "total_steps": 4048, "loss": 0.2550106644630432, "lr": 7.0890911532041375e-06, "epoch": 1.2293339923390585, "percentage": 61.46, "elapsed_time": "9:00:46", "remaining_time": "5:39:04"} +{"current_steps": 2489, "total_steps": 4048, "loss": 0.3221823573112488, "lr": 7.08127535782295e-06, "epoch": 1.2298282466328927, "percentage": 61.49, "elapsed_time": "9:01:00", "remaining_time": "5:38:51"} +{"current_steps": 2490, "total_steps": 4048, "loss": 0.26209163665771484, "lr": 7.073461510940631e-06, "epoch": 1.230322500926727, "percentage": 61.51, "elapsed_time": "9:01:12", "remaining_time": "5:38:38"} +{"current_steps": 2491, "total_steps": 4048, "loss": 0.28635868430137634, "lr": 7.06564961777359e-06, "epoch": 1.2308167552205609, "percentage": 61.54, "elapsed_time": "9:01:26", "remaining_time": "5:38:25"} +{"current_steps": 2492, "total_steps": 4048, "loss": 0.25630202889442444, "lr": 7.0578396835369355e-06, "epoch": 1.231311009514395, "percentage": 61.56, "elapsed_time": "9:01:38", "remaining_time": "5:38:12"} +{"current_steps": 2493, "total_steps": 4048, "loss": 0.27345454692840576, "lr": 7.050031713444474e-06, "epoch": 1.2318052638082293, "percentage": 61.59, "elapsed_time": "9:01:52", "remaining_time": "5:37:59"} +{"current_steps": 2494, "total_steps": 4048, "loss": 0.2365841269493103, "lr": 7.042225712708692e-06, "epoch": 1.2322995181020635, "percentage": 61.61, "elapsed_time": "9:02:04", "remaining_time": "5:37:46"} +{"current_steps": 2495, "total_steps": 4048, "loss": 0.2891104221343994, "lr": 7.03442168654076e-06, "epoch": 1.2327937723958977, "percentage": 61.64, "elapsed_time": "9:02:18", "remaining_time": "5:37:33"} +{"current_steps": 2496, "total_steps": 4048, "loss": 0.2713435888290405, "lr": 7.026619640150534e-06, "epoch": 1.233288026689732, "percentage": 61.66, "elapsed_time": "9:02:30", "remaining_time": "5:37:19"} +{"current_steps": 2497, "total_steps": 4048, "loss": 0.28552842140197754, "lr": 7.018819578746557e-06, "epoch": 1.2337822809835661, "percentage": 61.68, "elapsed_time": "9:02:43", "remaining_time": "5:37:06"} +{"current_steps": 2498, "total_steps": 4048, "loss": 0.2731080949306488, "lr": 7.011021507536031e-06, "epoch": 1.2342765352774, "percentage": 61.71, "elapsed_time": "9:02:56", "remaining_time": "5:36:53"} +{"current_steps": 2499, "total_steps": 4048, "loss": 0.27373206615448, "lr": 7.003225431724841e-06, "epoch": 1.2347707895712343, "percentage": 61.73, "elapsed_time": "9:03:09", "remaining_time": "5:36:40"} +{"current_steps": 2500, "total_steps": 4048, "loss": 0.24507245421409607, "lr": 6.99543135651753e-06, "epoch": 1.2352650438650685, "percentage": 61.76, "elapsed_time": "9:03:22", "remaining_time": "5:36:27"} +{"current_steps": 2501, "total_steps": 4048, "loss": 0.2653801739215851, "lr": 6.9876392871173205e-06, "epoch": 1.2357592981589027, "percentage": 61.78, "elapsed_time": "9:03:41", "remaining_time": "5:36:18"} +{"current_steps": 2502, "total_steps": 4048, "loss": 0.1929643303155899, "lr": 6.979849228726079e-06, "epoch": 1.236253552452737, "percentage": 61.81, "elapsed_time": "9:03:54", "remaining_time": "5:36:05"} +{"current_steps": 2503, "total_steps": 4048, "loss": 0.2684918940067291, "lr": 6.972061186544341e-06, "epoch": 1.2367478067465711, "percentage": 61.83, "elapsed_time": "9:04:07", "remaining_time": "5:35:51"} +{"current_steps": 2504, "total_steps": 4048, "loss": 0.23158729076385498, "lr": 6.964275165771288e-06, "epoch": 1.2372420610404053, "percentage": 61.86, "elapsed_time": "9:04:20", "remaining_time": "5:35:38"} +{"current_steps": 2505, "total_steps": 4048, "loss": 0.24757611751556396, "lr": 6.95649117160476e-06, "epoch": 1.2377363153342396, "percentage": 61.88, "elapsed_time": "9:04:33", "remaining_time": "5:35:25"} +{"current_steps": 2506, "total_steps": 4048, "loss": 0.2651844620704651, "lr": 6.9487092092412425e-06, "epoch": 1.2382305696280738, "percentage": 61.91, "elapsed_time": "9:04:46", "remaining_time": "5:35:12"} +{"current_steps": 2507, "total_steps": 4048, "loss": 0.26745620369911194, "lr": 6.940929283875859e-06, "epoch": 1.2387248239219077, "percentage": 61.93, "elapsed_time": "9:04:59", "remaining_time": "5:34:59"} +{"current_steps": 2508, "total_steps": 4048, "loss": 0.22088846564292908, "lr": 6.933151400702374e-06, "epoch": 1.239219078215742, "percentage": 61.96, "elapsed_time": "9:05:12", "remaining_time": "5:34:46"} +{"current_steps": 2509, "total_steps": 4048, "loss": 0.2662886381149292, "lr": 6.925375564913193e-06, "epoch": 1.2397133325095762, "percentage": 61.98, "elapsed_time": "9:05:25", "remaining_time": "5:34:33"} +{"current_steps": 2510, "total_steps": 4048, "loss": 0.2691834270954132, "lr": 6.917601781699357e-06, "epoch": 1.2402075868034104, "percentage": 62.01, "elapsed_time": "9:05:37", "remaining_time": "5:34:20"} +{"current_steps": 2511, "total_steps": 4048, "loss": 0.2110689878463745, "lr": 6.909830056250527e-06, "epoch": 1.2407018410972446, "percentage": 62.03, "elapsed_time": "9:05:51", "remaining_time": "5:34:07"} +{"current_steps": 2512, "total_steps": 4048, "loss": 0.29281991720199585, "lr": 6.902060393755001e-06, "epoch": 1.2411960953910788, "percentage": 62.06, "elapsed_time": "9:06:03", "remaining_time": "5:33:53"} +{"current_steps": 2513, "total_steps": 4048, "loss": 0.27409040927886963, "lr": 6.894292799399688e-06, "epoch": 1.2416903496849128, "percentage": 62.08, "elapsed_time": "9:06:16", "remaining_time": "5:33:40"} +{"current_steps": 2514, "total_steps": 4048, "loss": 0.29440224170684814, "lr": 6.886527278370131e-06, "epoch": 1.242184603978747, "percentage": 62.1, "elapsed_time": "9:06:29", "remaining_time": "5:33:27"} +{"current_steps": 2515, "total_steps": 4048, "loss": 0.23107948899269104, "lr": 6.878763835850475e-06, "epoch": 1.2426788582725812, "percentage": 62.13, "elapsed_time": "9:06:42", "remaining_time": "5:33:14"} +{"current_steps": 2516, "total_steps": 4048, "loss": 0.2682652473449707, "lr": 6.871002477023488e-06, "epoch": 1.2431731125664154, "percentage": 62.15, "elapsed_time": "9:06:55", "remaining_time": "5:33:01"} +{"current_steps": 2517, "total_steps": 4048, "loss": 0.2935982644557953, "lr": 6.863243207070534e-06, "epoch": 1.2436673668602496, "percentage": 62.18, "elapsed_time": "9:07:08", "remaining_time": "5:32:48"} +{"current_steps": 2518, "total_steps": 4048, "loss": 0.29027625918388367, "lr": 6.855486031171597e-06, "epoch": 1.2441616211540838, "percentage": 62.2, "elapsed_time": "9:07:21", "remaining_time": "5:32:35"} +{"current_steps": 2519, "total_steps": 4048, "loss": 0.25107353925704956, "lr": 6.84773095450526e-06, "epoch": 1.244655875447918, "percentage": 62.23, "elapsed_time": "9:07:34", "remaining_time": "5:32:22"} +{"current_steps": 2520, "total_steps": 4048, "loss": 0.279231995344162, "lr": 6.839977982248697e-06, "epoch": 1.2451501297417522, "percentage": 62.25, "elapsed_time": "9:07:47", "remaining_time": "5:32:08"} +{"current_steps": 2521, "total_steps": 4048, "loss": 0.2544802129268646, "lr": 6.832227119577677e-06, "epoch": 1.2456443840355864, "percentage": 62.28, "elapsed_time": "9:08:00", "remaining_time": "5:31:55"} +{"current_steps": 2522, "total_steps": 4048, "loss": 0.24365633726119995, "lr": 6.824478371666573e-06, "epoch": 1.2461386383294204, "percentage": 62.3, "elapsed_time": "9:08:12", "remaining_time": "5:31:42"} +{"current_steps": 2523, "total_steps": 4048, "loss": 0.2673290967941284, "lr": 6.816731743688336e-06, "epoch": 1.2466328926232546, "percentage": 62.33, "elapsed_time": "9:08:26", "remaining_time": "5:31:29"} +{"current_steps": 2524, "total_steps": 4048, "loss": 0.23896455764770508, "lr": 6.808987240814504e-06, "epoch": 1.2471271469170888, "percentage": 62.35, "elapsed_time": "9:08:38", "remaining_time": "5:31:16"} +{"current_steps": 2525, "total_steps": 4048, "loss": 0.23196406662464142, "lr": 6.801244868215192e-06, "epoch": 1.247621401210923, "percentage": 62.38, "elapsed_time": "9:08:51", "remaining_time": "5:31:03"} +{"current_steps": 2526, "total_steps": 4048, "loss": 0.24249708652496338, "lr": 6.793504631059106e-06, "epoch": 1.2481156555047572, "percentage": 62.4, "elapsed_time": "9:09:04", "remaining_time": "5:30:50"} +{"current_steps": 2527, "total_steps": 4048, "loss": 0.2366780787706375, "lr": 6.785766534513514e-06, "epoch": 1.2486099097985914, "percentage": 62.43, "elapsed_time": "9:09:17", "remaining_time": "5:30:37"} +{"current_steps": 2528, "total_steps": 4048, "loss": 0.2615105211734772, "lr": 6.778030583744254e-06, "epoch": 1.2491041640924256, "percentage": 62.45, "elapsed_time": "9:09:30", "remaining_time": "5:30:23"} +{"current_steps": 2529, "total_steps": 4048, "loss": 0.29761314392089844, "lr": 6.770296783915738e-06, "epoch": 1.2495984183862596, "percentage": 62.48, "elapsed_time": "9:09:42", "remaining_time": "5:30:10"} +{"current_steps": 2530, "total_steps": 4048, "loss": 0.25020867586135864, "lr": 6.762565140190948e-06, "epoch": 1.2500926726800938, "percentage": 62.5, "elapsed_time": "9:09:55", "remaining_time": "5:29:57"} +{"current_steps": 2531, "total_steps": 4048, "loss": 0.2716590166091919, "lr": 6.754835657731409e-06, "epoch": 1.250586926973928, "percentage": 62.52, "elapsed_time": "9:10:08", "remaining_time": "5:29:44"} +{"current_steps": 2532, "total_steps": 4048, "loss": 0.27042001485824585, "lr": 6.747108341697221e-06, "epoch": 1.2510811812677622, "percentage": 62.55, "elapsed_time": "9:10:21", "remaining_time": "5:29:31"} +{"current_steps": 2533, "total_steps": 4048, "loss": 0.2659035325050354, "lr": 6.739383197247023e-06, "epoch": 1.2515754355615964, "percentage": 62.57, "elapsed_time": "9:10:34", "remaining_time": "5:29:17"} +{"current_steps": 2534, "total_steps": 4048, "loss": 0.2803581655025482, "lr": 6.731660229538014e-06, "epoch": 1.2520696898554307, "percentage": 62.6, "elapsed_time": "9:10:47", "remaining_time": "5:29:04"} +{"current_steps": 2535, "total_steps": 4048, "loss": 0.24422097206115723, "lr": 6.723939443725938e-06, "epoch": 1.2525639441492649, "percentage": 62.62, "elapsed_time": "9:10:59", "remaining_time": "5:28:51"} +{"current_steps": 2536, "total_steps": 4048, "loss": 0.30003631114959717, "lr": 6.71622084496508e-06, "epoch": 1.253058198443099, "percentage": 62.65, "elapsed_time": "9:11:13", "remaining_time": "5:28:38"} +{"current_steps": 2537, "total_steps": 4048, "loss": 0.25745317339897156, "lr": 6.708504438408265e-06, "epoch": 1.2535524527369333, "percentage": 62.67, "elapsed_time": "9:11:25", "remaining_time": "5:28:25"} +{"current_steps": 2538, "total_steps": 4048, "loss": 0.27648618817329407, "lr": 6.700790229206856e-06, "epoch": 1.2540467070307673, "percentage": 62.7, "elapsed_time": "9:11:39", "remaining_time": "5:28:12"} +{"current_steps": 2539, "total_steps": 4048, "loss": 0.2579975724220276, "lr": 6.6930782225107536e-06, "epoch": 1.2545409613246015, "percentage": 62.72, "elapsed_time": "9:11:52", "remaining_time": "5:27:59"} +{"current_steps": 2540, "total_steps": 4048, "loss": 0.2806825637817383, "lr": 6.68536842346838e-06, "epoch": 1.2550352156184357, "percentage": 62.75, "elapsed_time": "9:12:04", "remaining_time": "5:27:46"} +{"current_steps": 2541, "total_steps": 4048, "loss": 0.2641657888889313, "lr": 6.677660837226685e-06, "epoch": 1.2555294699122699, "percentage": 62.77, "elapsed_time": "9:12:17", "remaining_time": "5:27:33"} +{"current_steps": 2542, "total_steps": 4048, "loss": 0.25483542680740356, "lr": 6.669955468931142e-06, "epoch": 1.256023724206104, "percentage": 62.8, "elapsed_time": "9:12:30", "remaining_time": "5:27:19"} +{"current_steps": 2543, "total_steps": 4048, "loss": 0.264334112405777, "lr": 6.662252323725751e-06, "epoch": 1.2565179784999383, "percentage": 62.82, "elapsed_time": "9:12:43", "remaining_time": "5:27:07"} +{"current_steps": 2544, "total_steps": 4048, "loss": 0.2541567385196686, "lr": 6.654551406753017e-06, "epoch": 1.2570122327937723, "percentage": 62.85, "elapsed_time": "9:12:56", "remaining_time": "5:26:53"} +{"current_steps": 2545, "total_steps": 4048, "loss": 0.2695424258708954, "lr": 6.646852723153965e-06, "epoch": 1.2575064870876065, "percentage": 62.87, "elapsed_time": "9:13:09", "remaining_time": "5:26:40"} +{"current_steps": 2546, "total_steps": 4048, "loss": 0.2694344222545624, "lr": 6.63915627806812e-06, "epoch": 1.2580007413814407, "percentage": 62.9, "elapsed_time": "9:13:21", "remaining_time": "5:26:27"} +{"current_steps": 2547, "total_steps": 4048, "loss": 0.2695961892604828, "lr": 6.631462076633527e-06, "epoch": 1.258494995675275, "percentage": 62.92, "elapsed_time": "9:13:35", "remaining_time": "5:26:14"} +{"current_steps": 2548, "total_steps": 4048, "loss": 0.26878753304481506, "lr": 6.623770123986719e-06, "epoch": 1.258989249969109, "percentage": 62.94, "elapsed_time": "9:13:47", "remaining_time": "5:26:01"} +{"current_steps": 2549, "total_steps": 4048, "loss": 0.27568501234054565, "lr": 6.616080425262738e-06, "epoch": 1.2594835042629433, "percentage": 62.97, "elapsed_time": "9:14:00", "remaining_time": "5:25:48"} +{"current_steps": 2550, "total_steps": 4048, "loss": 0.2991989254951477, "lr": 6.608392985595111e-06, "epoch": 1.2599777585567775, "percentage": 62.99, "elapsed_time": "9:14:13", "remaining_time": "5:25:34"} +{"current_steps": 2551, "total_steps": 4048, "loss": 0.21832239627838135, "lr": 6.600707810115869e-06, "epoch": 1.2604720128506117, "percentage": 63.02, "elapsed_time": "9:14:25", "remaining_time": "5:25:21"} +{"current_steps": 2552, "total_steps": 4048, "loss": 0.2671685516834259, "lr": 6.593024903955525e-06, "epoch": 1.260966267144446, "percentage": 63.04, "elapsed_time": "9:14:39", "remaining_time": "5:25:08"} +{"current_steps": 2553, "total_steps": 4048, "loss": 0.23399557173252106, "lr": 6.585344272243073e-06, "epoch": 1.26146052143828, "percentage": 63.07, "elapsed_time": "9:14:51", "remaining_time": "5:24:55"} +{"current_steps": 2554, "total_steps": 4048, "loss": 0.2701990008354187, "lr": 6.577665920105996e-06, "epoch": 1.2619547757321141, "percentage": 63.09, "elapsed_time": "9:15:04", "remaining_time": "5:24:42"} +{"current_steps": 2555, "total_steps": 4048, "loss": 0.2679189145565033, "lr": 6.56998985267025e-06, "epoch": 1.2624490300259483, "percentage": 63.12, "elapsed_time": "9:15:17", "remaining_time": "5:24:28"} +{"current_steps": 2556, "total_steps": 4048, "loss": 0.2597065567970276, "lr": 6.562316075060272e-06, "epoch": 1.2629432843197825, "percentage": 63.14, "elapsed_time": "9:15:30", "remaining_time": "5:24:15"} +{"current_steps": 2557, "total_steps": 4048, "loss": 0.2942010462284088, "lr": 6.554644592398962e-06, "epoch": 1.2634375386136167, "percentage": 63.17, "elapsed_time": "9:15:42", "remaining_time": "5:24:02"} +{"current_steps": 2558, "total_steps": 4048, "loss": 0.2547098994255066, "lr": 6.546975409807696e-06, "epoch": 1.263931792907451, "percentage": 63.19, "elapsed_time": "9:15:56", "remaining_time": "5:23:49"} +{"current_steps": 2559, "total_steps": 4048, "loss": 0.2779114246368408, "lr": 6.539308532406306e-06, "epoch": 1.264426047201285, "percentage": 63.22, "elapsed_time": "9:16:08", "remaining_time": "5:23:36"} +{"current_steps": 2560, "total_steps": 4048, "loss": 0.22318917512893677, "lr": 6.531643965313093e-06, "epoch": 1.2649203014951191, "percentage": 63.24, "elapsed_time": "9:16:21", "remaining_time": "5:23:23"} +{"current_steps": 2561, "total_steps": 4048, "loss": 0.25439128279685974, "lr": 6.523981713644814e-06, "epoch": 1.2654145557889533, "percentage": 63.27, "elapsed_time": "9:16:34", "remaining_time": "5:23:10"} +{"current_steps": 2562, "total_steps": 4048, "loss": 0.2317974865436554, "lr": 6.516321782516677e-06, "epoch": 1.2659088100827876, "percentage": 63.29, "elapsed_time": "9:16:47", "remaining_time": "5:22:56"} +{"current_steps": 2563, "total_steps": 4048, "loss": 0.273223876953125, "lr": 6.508664177042339e-06, "epoch": 1.2664030643766218, "percentage": 63.32, "elapsed_time": "9:17:00", "remaining_time": "5:22:43"} +{"current_steps": 2564, "total_steps": 4048, "loss": 0.28408509492874146, "lr": 6.501008902333912e-06, "epoch": 1.266897318670456, "percentage": 63.34, "elapsed_time": "9:17:13", "remaining_time": "5:22:30"} +{"current_steps": 2565, "total_steps": 4048, "loss": 0.2702238857746124, "lr": 6.493355963501951e-06, "epoch": 1.2673915729642902, "percentage": 63.36, "elapsed_time": "9:17:25", "remaining_time": "5:22:17"} +{"current_steps": 2566, "total_steps": 4048, "loss": 0.2142164558172226, "lr": 6.485705365655441e-06, "epoch": 1.2678858272581244, "percentage": 63.39, "elapsed_time": "9:17:38", "remaining_time": "5:22:04"} +{"current_steps": 2567, "total_steps": 4048, "loss": 0.2654300928115845, "lr": 6.478057113901817e-06, "epoch": 1.2683800815519586, "percentage": 63.41, "elapsed_time": "9:17:51", "remaining_time": "5:21:51"} +{"current_steps": 2568, "total_steps": 4048, "loss": 0.24601367115974426, "lr": 6.470411213346941e-06, "epoch": 1.2688743358457928, "percentage": 63.44, "elapsed_time": "9:18:04", "remaining_time": "5:21:37"} +{"current_steps": 2569, "total_steps": 4048, "loss": 0.26201942563056946, "lr": 6.462767669095109e-06, "epoch": 1.2693685901396268, "percentage": 63.46, "elapsed_time": "9:18:17", "remaining_time": "5:21:24"} +{"current_steps": 2570, "total_steps": 4048, "loss": 0.2839587926864624, "lr": 6.455126486249038e-06, "epoch": 1.269862844433461, "percentage": 63.49, "elapsed_time": "9:18:29", "remaining_time": "5:21:11"} +{"current_steps": 2571, "total_steps": 4048, "loss": 0.21100708842277527, "lr": 6.447487669909873e-06, "epoch": 1.2703570987272952, "percentage": 63.51, "elapsed_time": "9:18:43", "remaining_time": "5:20:58"} +{"current_steps": 2572, "total_steps": 4048, "loss": 0.2181582748889923, "lr": 6.439851225177185e-06, "epoch": 1.2708513530211294, "percentage": 63.54, "elapsed_time": "9:18:56", "remaining_time": "5:20:45"} +{"current_steps": 2573, "total_steps": 4048, "loss": 0.29196488857269287, "lr": 6.432217157148948e-06, "epoch": 1.2713456073149636, "percentage": 63.56, "elapsed_time": "9:19:09", "remaining_time": "5:20:32"} +{"current_steps": 2574, "total_steps": 4048, "loss": 0.2365931123495102, "lr": 6.424585470921563e-06, "epoch": 1.2718398616087978, "percentage": 63.59, "elapsed_time": "9:19:21", "remaining_time": "5:20:19"} +{"current_steps": 2575, "total_steps": 4048, "loss": 0.2277393937110901, "lr": 6.4169561715898255e-06, "epoch": 1.2723341159026318, "percentage": 63.61, "elapsed_time": "9:19:35", "remaining_time": "5:20:06"} +{"current_steps": 2576, "total_steps": 4048, "loss": 0.25285032391548157, "lr": 6.409329264246956e-06, "epoch": 1.272828370196466, "percentage": 63.64, "elapsed_time": "9:19:47", "remaining_time": "5:19:53"} +{"current_steps": 2577, "total_steps": 4048, "loss": 0.253650963306427, "lr": 6.401704753984563e-06, "epoch": 1.2733226244903002, "percentage": 63.66, "elapsed_time": "9:20:00", "remaining_time": "5:19:39"} +{"current_steps": 2578, "total_steps": 4048, "loss": 0.22143784165382385, "lr": 6.394082645892668e-06, "epoch": 1.2738168787841344, "percentage": 63.69, "elapsed_time": "9:20:13", "remaining_time": "5:19:26"} +{"current_steps": 2579, "total_steps": 4048, "loss": 0.27591395378112793, "lr": 6.3864629450596696e-06, "epoch": 1.2743111330779686, "percentage": 63.71, "elapsed_time": "9:20:26", "remaining_time": "5:19:13"} +{"current_steps": 2580, "total_steps": 4048, "loss": 0.32865333557128906, "lr": 6.37884565657238e-06, "epoch": 1.2748053873718028, "percentage": 63.74, "elapsed_time": "9:20:38", "remaining_time": "5:19:00"} +{"current_steps": 2581, "total_steps": 4048, "loss": 0.2743702530860901, "lr": 6.371230785515992e-06, "epoch": 1.275299641665637, "percentage": 63.76, "elapsed_time": "9:20:51", "remaining_time": "5:18:47"} +{"current_steps": 2582, "total_steps": 4048, "loss": 0.23967956006526947, "lr": 6.3636183369740845e-06, "epoch": 1.2757938959594712, "percentage": 63.78, "elapsed_time": "9:21:04", "remaining_time": "5:18:34"} +{"current_steps": 2583, "total_steps": 4048, "loss": 0.2474803626537323, "lr": 6.356008316028614e-06, "epoch": 1.2762881502533054, "percentage": 63.81, "elapsed_time": "9:21:17", "remaining_time": "5:18:20"} +{"current_steps": 2584, "total_steps": 4048, "loss": 0.2523267865180969, "lr": 6.348400727759925e-06, "epoch": 1.2767824045471394, "percentage": 63.83, "elapsed_time": "9:21:30", "remaining_time": "5:18:07"} +{"current_steps": 2585, "total_steps": 4048, "loss": 0.2549436092376709, "lr": 6.340795577246738e-06, "epoch": 1.2772766588409736, "percentage": 63.86, "elapsed_time": "9:21:43", "remaining_time": "5:17:54"} +{"current_steps": 2586, "total_steps": 4048, "loss": 0.2602443993091583, "lr": 6.333192869566138e-06, "epoch": 1.2777709131348078, "percentage": 63.88, "elapsed_time": "9:21:56", "remaining_time": "5:17:41"} +{"current_steps": 2587, "total_steps": 4048, "loss": 0.22912462055683136, "lr": 6.325592609793588e-06, "epoch": 1.278265167428642, "percentage": 63.91, "elapsed_time": "9:22:09", "remaining_time": "5:17:28"} +{"current_steps": 2588, "total_steps": 4048, "loss": 0.3004158139228821, "lr": 6.317994803002907e-06, "epoch": 1.2787594217224763, "percentage": 63.93, "elapsed_time": "9:22:22", "remaining_time": "5:17:15"} +{"current_steps": 2589, "total_steps": 4048, "loss": 0.25851407647132874, "lr": 6.310399454266289e-06, "epoch": 1.2792536760163105, "percentage": 63.96, "elapsed_time": "9:22:34", "remaining_time": "5:17:02"} +{"current_steps": 2590, "total_steps": 4048, "loss": 0.24637526273727417, "lr": 6.302806568654277e-06, "epoch": 1.2797479303101444, "percentage": 63.98, "elapsed_time": "9:22:47", "remaining_time": "5:16:49"} +{"current_steps": 2591, "total_steps": 4048, "loss": 0.26500213146209717, "lr": 6.295216151235774e-06, "epoch": 1.2802421846039787, "percentage": 64.01, "elapsed_time": "9:23:00", "remaining_time": "5:16:35"} +{"current_steps": 2592, "total_steps": 4048, "loss": 0.24276241660118103, "lr": 6.287628207078031e-06, "epoch": 1.2807364388978129, "percentage": 64.03, "elapsed_time": "9:23:13", "remaining_time": "5:16:22"} +{"current_steps": 2593, "total_steps": 4048, "loss": 0.27117204666137695, "lr": 6.280042741246655e-06, "epoch": 1.281230693191647, "percentage": 64.06, "elapsed_time": "9:23:26", "remaining_time": "5:16:09"} +{"current_steps": 2594, "total_steps": 4048, "loss": 0.29287856817245483, "lr": 6.272459758805596e-06, "epoch": 1.2817249474854813, "percentage": 64.08, "elapsed_time": "9:23:39", "remaining_time": "5:15:56"} +{"current_steps": 2595, "total_steps": 4048, "loss": 0.3065788149833679, "lr": 6.26487926481714e-06, "epoch": 1.2822192017793155, "percentage": 64.11, "elapsed_time": "9:23:51", "remaining_time": "5:15:43"} +{"current_steps": 2596, "total_steps": 4048, "loss": 0.2738455533981323, "lr": 6.257301264341915e-06, "epoch": 1.2827134560731497, "percentage": 64.13, "elapsed_time": "9:24:05", "remaining_time": "5:15:30"} +{"current_steps": 2597, "total_steps": 4048, "loss": 0.24216318130493164, "lr": 6.2497257624388915e-06, "epoch": 1.283207710366984, "percentage": 64.16, "elapsed_time": "9:24:17", "remaining_time": "5:15:17"} +{"current_steps": 2598, "total_steps": 4048, "loss": 0.276785671710968, "lr": 6.242152764165368e-06, "epoch": 1.283701964660818, "percentage": 64.18, "elapsed_time": "9:24:31", "remaining_time": "5:15:04"} +{"current_steps": 2599, "total_steps": 4048, "loss": 0.24999365210533142, "lr": 6.234582274576961e-06, "epoch": 1.2841962189546523, "percentage": 64.2, "elapsed_time": "9:24:43", "remaining_time": "5:14:50"} +{"current_steps": 2600, "total_steps": 4048, "loss": 0.27714112401008606, "lr": 6.227014298727627e-06, "epoch": 1.2846904732484863, "percentage": 64.23, "elapsed_time": "9:24:56", "remaining_time": "5:14:37"} +{"current_steps": 2601, "total_steps": 4048, "loss": 0.2422318160533905, "lr": 6.219448841669639e-06, "epoch": 1.2851847275423205, "percentage": 64.25, "elapsed_time": "9:25:15", "remaining_time": "5:14:28"} +{"current_steps": 2602, "total_steps": 4048, "loss": 0.26688697934150696, "lr": 6.21188590845359e-06, "epoch": 1.2856789818361547, "percentage": 64.28, "elapsed_time": "9:25:28", "remaining_time": "5:14:14"} +{"current_steps": 2603, "total_steps": 4048, "loss": 0.256889671087265, "lr": 6.204325504128379e-06, "epoch": 1.286173236129989, "percentage": 64.3, "elapsed_time": "9:25:41", "remaining_time": "5:14:01"} +{"current_steps": 2604, "total_steps": 4048, "loss": 0.27372461557388306, "lr": 6.196767633741225e-06, "epoch": 1.2866674904238231, "percentage": 64.33, "elapsed_time": "9:25:54", "remaining_time": "5:13:48"} +{"current_steps": 2605, "total_steps": 4048, "loss": 0.25194403529167175, "lr": 6.189212302337663e-06, "epoch": 1.287161744717657, "percentage": 64.35, "elapsed_time": "9:26:07", "remaining_time": "5:13:35"} +{"current_steps": 2606, "total_steps": 4048, "loss": 0.24381688237190247, "lr": 6.181659514961515e-06, "epoch": 1.2876559990114913, "percentage": 64.38, "elapsed_time": "9:26:20", "remaining_time": "5:13:22"} +{"current_steps": 2607, "total_steps": 4048, "loss": 0.255805104970932, "lr": 6.17410927665492e-06, "epoch": 1.2881502533053255, "percentage": 64.4, "elapsed_time": "9:26:33", "remaining_time": "5:13:09"} +{"current_steps": 2608, "total_steps": 4048, "loss": 0.25070682168006897, "lr": 6.166561592458307e-06, "epoch": 1.2886445075991597, "percentage": 64.43, "elapsed_time": "9:26:46", "remaining_time": "5:12:56"} +{"current_steps": 2609, "total_steps": 4048, "loss": 0.24080060422420502, "lr": 6.159016467410397e-06, "epoch": 1.289138761892994, "percentage": 64.45, "elapsed_time": "9:26:59", "remaining_time": "5:12:43"} +{"current_steps": 2610, "total_steps": 4048, "loss": 0.28041762113571167, "lr": 6.151473906548215e-06, "epoch": 1.2896330161868281, "percentage": 64.48, "elapsed_time": "9:27:12", "remaining_time": "5:12:30"} +{"current_steps": 2611, "total_steps": 4048, "loss": 0.2624273896217346, "lr": 6.143933914907065e-06, "epoch": 1.2901272704806623, "percentage": 64.5, "elapsed_time": "9:27:25", "remaining_time": "5:12:17"} +{"current_steps": 2612, "total_steps": 4048, "loss": 0.2658112049102783, "lr": 6.136396497520536e-06, "epoch": 1.2906215247744965, "percentage": 64.53, "elapsed_time": "9:27:38", "remaining_time": "5:12:04"} +{"current_steps": 2613, "total_steps": 4048, "loss": 0.27714237570762634, "lr": 6.1288616594205e-06, "epoch": 1.2911157790683307, "percentage": 64.55, "elapsed_time": "9:27:51", "remaining_time": "5:11:51"} +{"current_steps": 2614, "total_steps": 4048, "loss": 0.23253153264522552, "lr": 6.121329405637111e-06, "epoch": 1.291610033362165, "percentage": 64.58, "elapsed_time": "9:28:04", "remaining_time": "5:11:38"} +{"current_steps": 2615, "total_steps": 4048, "loss": 0.2438409924507141, "lr": 6.1137997411987915e-06, "epoch": 1.292104287655999, "percentage": 64.6, "elapsed_time": "9:28:17", "remaining_time": "5:11:25"} +{"current_steps": 2616, "total_steps": 4048, "loss": 0.24013856053352356, "lr": 6.106272671132236e-06, "epoch": 1.2925985419498331, "percentage": 64.62, "elapsed_time": "9:28:30", "remaining_time": "5:11:12"} +{"current_steps": 2617, "total_steps": 4048, "loss": 0.2850446403026581, "lr": 6.098748200462408e-06, "epoch": 1.2930927962436674, "percentage": 64.65, "elapsed_time": "9:28:43", "remaining_time": "5:10:59"} +{"current_steps": 2618, "total_steps": 4048, "loss": 0.22195187211036682, "lr": 6.0912263342125445e-06, "epoch": 1.2935870505375016, "percentage": 64.67, "elapsed_time": "9:28:56", "remaining_time": "5:10:46"} +{"current_steps": 2619, "total_steps": 4048, "loss": 0.29266390204429626, "lr": 6.083707077404129e-06, "epoch": 1.2940813048313358, "percentage": 64.7, "elapsed_time": "9:29:09", "remaining_time": "5:10:32"} +{"current_steps": 2620, "total_steps": 4048, "loss": 0.26741352677345276, "lr": 6.076190435056913e-06, "epoch": 1.29457555912517, "percentage": 64.72, "elapsed_time": "9:29:22", "remaining_time": "5:10:19"} +{"current_steps": 2621, "total_steps": 4048, "loss": 0.26014602184295654, "lr": 6.068676412188892e-06, "epoch": 1.295069813419004, "percentage": 64.75, "elapsed_time": "9:29:35", "remaining_time": "5:10:06"} +{"current_steps": 2622, "total_steps": 4048, "loss": 0.2561393976211548, "lr": 6.061165013816333e-06, "epoch": 1.2955640677128382, "percentage": 64.77, "elapsed_time": "9:29:48", "remaining_time": "5:09:53"} +{"current_steps": 2623, "total_steps": 4048, "loss": 0.2952851951122284, "lr": 6.053656244953728e-06, "epoch": 1.2960583220066724, "percentage": 64.8, "elapsed_time": "9:30:01", "remaining_time": "5:09:40"} +{"current_steps": 2624, "total_steps": 4048, "loss": 0.2830423414707184, "lr": 6.046150110613831e-06, "epoch": 1.2965525763005066, "percentage": 64.82, "elapsed_time": "9:30:14", "remaining_time": "5:09:27"} +{"current_steps": 2625, "total_steps": 4048, "loss": 0.22306497395038605, "lr": 6.038646615807622e-06, "epoch": 1.2970468305943408, "percentage": 64.85, "elapsed_time": "9:30:27", "remaining_time": "5:09:14"} +{"current_steps": 2626, "total_steps": 4048, "loss": 0.23291784524917603, "lr": 6.031145765544333e-06, "epoch": 1.297541084888175, "percentage": 64.87, "elapsed_time": "9:30:41", "remaining_time": "5:09:01"} +{"current_steps": 2627, "total_steps": 4048, "loss": 0.2376563400030136, "lr": 6.023647564831425e-06, "epoch": 1.2980353391820092, "percentage": 64.9, "elapsed_time": "9:30:54", "remaining_time": "5:08:48"} +{"current_steps": 2628, "total_steps": 4048, "loss": 0.2873516380786896, "lr": 6.016152018674588e-06, "epoch": 1.2985295934758434, "percentage": 64.92, "elapsed_time": "9:31:07", "remaining_time": "5:08:35"} +{"current_steps": 2629, "total_steps": 4048, "loss": 0.2416999638080597, "lr": 6.00865913207774e-06, "epoch": 1.2990238477696776, "percentage": 64.95, "elapsed_time": "9:31:20", "remaining_time": "5:08:22"} +{"current_steps": 2630, "total_steps": 4048, "loss": 0.2627726197242737, "lr": 6.001168910043023e-06, "epoch": 1.2995181020635118, "percentage": 64.97, "elapsed_time": "9:31:32", "remaining_time": "5:08:09"} +{"current_steps": 2631, "total_steps": 4048, "loss": 0.25375279784202576, "lr": 5.993681357570809e-06, "epoch": 1.3000123563573458, "percentage": 65.0, "elapsed_time": "9:31:45", "remaining_time": "5:07:56"} +{"current_steps": 2632, "total_steps": 4048, "loss": 0.2853030562400818, "lr": 5.986196479659676e-06, "epoch": 1.30050661065118, "percentage": 65.02, "elapsed_time": "9:31:58", "remaining_time": "5:07:43"} +{"current_steps": 2633, "total_steps": 4048, "loss": 0.2626519501209259, "lr": 5.978714281306425e-06, "epoch": 1.3010008649450142, "percentage": 65.04, "elapsed_time": "9:32:11", "remaining_time": "5:07:30"} +{"current_steps": 2634, "total_steps": 4048, "loss": 0.2895713448524475, "lr": 5.971234767506057e-06, "epoch": 1.3014951192388484, "percentage": 65.07, "elapsed_time": "9:32:24", "remaining_time": "5:07:16"} +{"current_steps": 2635, "total_steps": 4048, "loss": 0.24617832899093628, "lr": 5.9637579432518e-06, "epoch": 1.3019893735326826, "percentage": 65.09, "elapsed_time": "9:32:37", "remaining_time": "5:07:03"} +{"current_steps": 2636, "total_steps": 4048, "loss": 0.25497785210609436, "lr": 5.956283813535066e-06, "epoch": 1.3024836278265166, "percentage": 65.12, "elapsed_time": "9:32:50", "remaining_time": "5:06:50"} +{"current_steps": 2637, "total_steps": 4048, "loss": 0.25832462310791016, "lr": 5.948812383345484e-06, "epoch": 1.3029778821203508, "percentage": 65.14, "elapsed_time": "9:33:03", "remaining_time": "5:06:37"} +{"current_steps": 2638, "total_steps": 4048, "loss": 0.24273909628391266, "lr": 5.941343657670866e-06, "epoch": 1.303472136414185, "percentage": 65.17, "elapsed_time": "9:33:15", "remaining_time": "5:06:24"} +{"current_steps": 2639, "total_steps": 4048, "loss": 0.2668009400367737, "lr": 5.933877641497232e-06, "epoch": 1.3039663907080192, "percentage": 65.19, "elapsed_time": "9:33:29", "remaining_time": "5:06:11"} +{"current_steps": 2640, "total_steps": 4048, "loss": 0.2519373595714569, "lr": 5.92641433980879e-06, "epoch": 1.3044606450018534, "percentage": 65.22, "elapsed_time": "9:33:41", "remaining_time": "5:05:58"} +{"current_steps": 2641, "total_steps": 4048, "loss": 0.30091768503189087, "lr": 5.918953757587928e-06, "epoch": 1.3049548992956876, "percentage": 65.24, "elapsed_time": "9:33:54", "remaining_time": "5:05:45"} +{"current_steps": 2642, "total_steps": 4048, "loss": 0.2504241466522217, "lr": 5.911495899815225e-06, "epoch": 1.3054491535895218, "percentage": 65.27, "elapsed_time": "9:34:07", "remaining_time": "5:05:32"} +{"current_steps": 2643, "total_steps": 4048, "loss": 0.24741190671920776, "lr": 5.904040771469444e-06, "epoch": 1.305943407883356, "percentage": 65.29, "elapsed_time": "9:34:20", "remaining_time": "5:05:18"} +{"current_steps": 2644, "total_steps": 4048, "loss": 0.2636350691318512, "lr": 5.896588377527519e-06, "epoch": 1.3064376621771903, "percentage": 65.32, "elapsed_time": "9:34:34", "remaining_time": "5:05:06"} +{"current_steps": 2645, "total_steps": 4048, "loss": 0.22512421011924744, "lr": 5.889138722964563e-06, "epoch": 1.3069319164710245, "percentage": 65.34, "elapsed_time": "9:34:46", "remaining_time": "5:04:52"} +{"current_steps": 2646, "total_steps": 4048, "loss": 0.26447975635528564, "lr": 5.8816918127538546e-06, "epoch": 1.3074261707648585, "percentage": 65.37, "elapsed_time": "9:34:59", "remaining_time": "5:04:39"} +{"current_steps": 2647, "total_steps": 4048, "loss": 0.22084996104240417, "lr": 5.874247651866853e-06, "epoch": 1.3079204250586927, "percentage": 65.39, "elapsed_time": "9:35:11", "remaining_time": "5:04:26"} +{"current_steps": 2648, "total_steps": 4048, "loss": 0.24033552408218384, "lr": 5.8668062452731715e-06, "epoch": 1.3084146793525269, "percentage": 65.42, "elapsed_time": "9:35:24", "remaining_time": "5:04:13"} +{"current_steps": 2649, "total_steps": 4048, "loss": 0.24829509854316711, "lr": 5.8593675979405795e-06, "epoch": 1.308908933646361, "percentage": 65.44, "elapsed_time": "9:35:37", "remaining_time": "5:04:00"} +{"current_steps": 2650, "total_steps": 4048, "loss": 0.29011303186416626, "lr": 5.851931714835016e-06, "epoch": 1.3094031879401953, "percentage": 65.46, "elapsed_time": "9:35:50", "remaining_time": "5:03:46"} +{"current_steps": 2651, "total_steps": 4048, "loss": 0.273196280002594, "lr": 5.8444986009205754e-06, "epoch": 1.3098974422340295, "percentage": 65.49, "elapsed_time": "9:36:03", "remaining_time": "5:03:33"} +{"current_steps": 2652, "total_steps": 4048, "loss": 0.28843480348587036, "lr": 5.837068261159491e-06, "epoch": 1.3103916965278635, "percentage": 65.51, "elapsed_time": "9:36:15", "remaining_time": "5:03:20"} +{"current_steps": 2653, "total_steps": 4048, "loss": 0.25919461250305176, "lr": 5.829640700512159e-06, "epoch": 1.3108859508216977, "percentage": 65.54, "elapsed_time": "9:36:28", "remaining_time": "5:03:07"} +{"current_steps": 2654, "total_steps": 4048, "loss": 0.24588480591773987, "lr": 5.822215923937105e-06, "epoch": 1.3113802051155319, "percentage": 65.56, "elapsed_time": "9:36:41", "remaining_time": "5:02:54"} +{"current_steps": 2655, "total_steps": 4048, "loss": 0.26138943433761597, "lr": 5.814793936391001e-06, "epoch": 1.311874459409366, "percentage": 65.59, "elapsed_time": "9:36:54", "remaining_time": "5:02:41"} +{"current_steps": 2656, "total_steps": 4048, "loss": 0.2740943729877472, "lr": 5.807374742828675e-06, "epoch": 1.3123687137032003, "percentage": 65.61, "elapsed_time": "9:37:07", "remaining_time": "5:02:28"} +{"current_steps": 2657, "total_steps": 4048, "loss": 0.2307349294424057, "lr": 5.7999583482030605e-06, "epoch": 1.3128629679970345, "percentage": 65.64, "elapsed_time": "9:37:20", "remaining_time": "5:02:15"} +{"current_steps": 2658, "total_steps": 4048, "loss": 0.28424161672592163, "lr": 5.792544757465242e-06, "epoch": 1.3133572222908687, "percentage": 65.66, "elapsed_time": "9:37:33", "remaining_time": "5:02:01"} +{"current_steps": 2659, "total_steps": 4048, "loss": 0.2586106061935425, "lr": 5.785133975564426e-06, "epoch": 1.313851476584703, "percentage": 65.69, "elapsed_time": "9:37:46", "remaining_time": "5:01:49"} +{"current_steps": 2660, "total_steps": 4048, "loss": 0.23268333077430725, "lr": 5.7777260074479455e-06, "epoch": 1.3143457308785371, "percentage": 65.71, "elapsed_time": "9:37:59", "remaining_time": "5:01:36"} +{"current_steps": 2661, "total_steps": 4048, "loss": 0.22144779562950134, "lr": 5.770320858061254e-06, "epoch": 1.314839985172371, "percentage": 65.74, "elapsed_time": "9:38:12", "remaining_time": "5:01:23"} +{"current_steps": 2662, "total_steps": 4048, "loss": 0.2450334131717682, "lr": 5.762918532347925e-06, "epoch": 1.3153342394662053, "percentage": 65.76, "elapsed_time": "9:38:25", "remaining_time": "5:01:09"} +{"current_steps": 2663, "total_steps": 4048, "loss": 0.24483400583267212, "lr": 5.7555190352496375e-06, "epoch": 1.3158284937600395, "percentage": 65.79, "elapsed_time": "9:38:38", "remaining_time": "5:00:56"} +{"current_steps": 2664, "total_steps": 4048, "loss": 0.2590720057487488, "lr": 5.748122371706198e-06, "epoch": 1.3163227480538737, "percentage": 65.81, "elapsed_time": "9:38:51", "remaining_time": "5:00:43"} +{"current_steps": 2665, "total_steps": 4048, "loss": 0.27116847038269043, "lr": 5.740728546655515e-06, "epoch": 1.316817002347708, "percentage": 65.83, "elapsed_time": "9:39:04", "remaining_time": "5:00:30"} +{"current_steps": 2666, "total_steps": 4048, "loss": 0.2720273435115814, "lr": 5.733337565033595e-06, "epoch": 1.3173112566415421, "percentage": 65.86, "elapsed_time": "9:39:17", "remaining_time": "5:00:17"} +{"current_steps": 2667, "total_steps": 4048, "loss": 0.22150173783302307, "lr": 5.7259494317745514e-06, "epoch": 1.3178055109353761, "percentage": 65.88, "elapsed_time": "9:39:31", "remaining_time": "5:00:04"} +{"current_steps": 2668, "total_steps": 4048, "loss": 0.27474984526634216, "lr": 5.718564151810597e-06, "epoch": 1.3182997652292103, "percentage": 65.91, "elapsed_time": "9:39:43", "remaining_time": "4:59:51"} +{"current_steps": 2669, "total_steps": 4048, "loss": 0.2547265291213989, "lr": 5.711181730072044e-06, "epoch": 1.3187940195230445, "percentage": 65.93, "elapsed_time": "9:39:56", "remaining_time": "4:59:38"} +{"current_steps": 2670, "total_steps": 4048, "loss": 0.2686036229133606, "lr": 5.703802171487286e-06, "epoch": 1.3192882738168787, "percentage": 65.96, "elapsed_time": "9:40:10", "remaining_time": "4:59:25"} +{"current_steps": 2671, "total_steps": 4048, "loss": 0.2276458591222763, "lr": 5.696425480982814e-06, "epoch": 1.319782528110713, "percentage": 65.98, "elapsed_time": "9:40:22", "remaining_time": "4:59:12"} +{"current_steps": 2672, "total_steps": 4048, "loss": 0.25005075335502625, "lr": 5.6890516634832e-06, "epoch": 1.3202767824045472, "percentage": 66.01, "elapsed_time": "9:40:35", "remaining_time": "4:58:59"} +{"current_steps": 2673, "total_steps": 4048, "loss": 0.25919869542121887, "lr": 5.681680723911104e-06, "epoch": 1.3207710366983814, "percentage": 66.03, "elapsed_time": "9:40:48", "remaining_time": "4:58:46"} +{"current_steps": 2674, "total_steps": 4048, "loss": 0.2684757709503174, "lr": 5.6743126671872505e-06, "epoch": 1.3212652909922156, "percentage": 66.06, "elapsed_time": "9:41:02", "remaining_time": "4:58:33"} +{"current_steps": 2675, "total_steps": 4048, "loss": 0.2554991543292999, "lr": 5.666947498230451e-06, "epoch": 1.3217595452860498, "percentage": 66.08, "elapsed_time": "9:41:14", "remaining_time": "4:58:20"} +{"current_steps": 2676, "total_steps": 4048, "loss": 0.27026665210723877, "lr": 5.6595852219575975e-06, "epoch": 1.322253799579884, "percentage": 66.11, "elapsed_time": "9:41:28", "remaining_time": "4:58:07"} +{"current_steps": 2677, "total_steps": 4048, "loss": 0.3248092234134674, "lr": 5.652225843283629e-06, "epoch": 1.322748053873718, "percentage": 66.13, "elapsed_time": "9:41:40", "remaining_time": "4:57:54"} +{"current_steps": 2678, "total_steps": 4048, "loss": 0.2554503083229065, "lr": 5.644869367121564e-06, "epoch": 1.3232423081675522, "percentage": 66.16, "elapsed_time": "9:41:54", "remaining_time": "4:57:41"} +{"current_steps": 2679, "total_steps": 4048, "loss": 0.25482693314552307, "lr": 5.637515798382488e-06, "epoch": 1.3237365624613864, "percentage": 66.18, "elapsed_time": "9:42:07", "remaining_time": "4:57:28"} +{"current_steps": 2680, "total_steps": 4048, "loss": 0.24664446711540222, "lr": 5.630165141975523e-06, "epoch": 1.3242308167552206, "percentage": 66.21, "elapsed_time": "9:42:20", "remaining_time": "4:57:15"} +{"current_steps": 2681, "total_steps": 4048, "loss": 0.23855865001678467, "lr": 5.622817402807879e-06, "epoch": 1.3247250710490548, "percentage": 66.23, "elapsed_time": "9:42:33", "remaining_time": "4:57:02"} +{"current_steps": 2682, "total_steps": 4048, "loss": 0.2847699820995331, "lr": 5.615472585784796e-06, "epoch": 1.325219325342889, "percentage": 66.25, "elapsed_time": "9:42:46", "remaining_time": "4:56:49"} +{"current_steps": 2683, "total_steps": 4048, "loss": 0.2705647051334381, "lr": 5.608130695809564e-06, "epoch": 1.325713579636723, "percentage": 66.28, "elapsed_time": "9:42:59", "remaining_time": "4:56:36"} +{"current_steps": 2684, "total_steps": 4048, "loss": 0.30135318636894226, "lr": 5.600791737783523e-06, "epoch": 1.3262078339305572, "percentage": 66.3, "elapsed_time": "9:43:12", "remaining_time": "4:56:23"} +{"current_steps": 2685, "total_steps": 4048, "loss": 0.261536180973053, "lr": 5.593455716606069e-06, "epoch": 1.3267020882243914, "percentage": 66.33, "elapsed_time": "9:43:26", "remaining_time": "4:56:10"} +{"current_steps": 2686, "total_steps": 4048, "loss": 0.24006187915802002, "lr": 5.586122637174614e-06, "epoch": 1.3271963425182256, "percentage": 66.35, "elapsed_time": "9:43:38", "remaining_time": "4:55:57"} +{"current_steps": 2687, "total_steps": 4048, "loss": 0.27928346395492554, "lr": 5.578792504384618e-06, "epoch": 1.3276905968120598, "percentage": 66.38, "elapsed_time": "9:43:52", "remaining_time": "4:55:44"} +{"current_steps": 2688, "total_steps": 4048, "loss": 0.24134980142116547, "lr": 5.5714653231295745e-06, "epoch": 1.328184851105894, "percentage": 66.4, "elapsed_time": "9:44:04", "remaining_time": "4:55:31"} +{"current_steps": 2689, "total_steps": 4048, "loss": 0.27914801239967346, "lr": 5.5641410983010055e-06, "epoch": 1.3286791053997282, "percentage": 66.43, "elapsed_time": "9:44:18", "remaining_time": "4:55:18"} +{"current_steps": 2690, "total_steps": 4048, "loss": 0.2735476493835449, "lr": 5.55681983478846e-06, "epoch": 1.3291733596935624, "percentage": 66.45, "elapsed_time": "9:44:30", "remaining_time": "4:55:04"} +{"current_steps": 2691, "total_steps": 4048, "loss": 0.24919739365577698, "lr": 5.549501537479511e-06, "epoch": 1.3296676139873966, "percentage": 66.48, "elapsed_time": "9:44:44", "remaining_time": "4:54:51"} +{"current_steps": 2692, "total_steps": 4048, "loss": 0.25435787439346313, "lr": 5.542186211259737e-06, "epoch": 1.3301618682812306, "percentage": 66.5, "elapsed_time": "9:44:56", "remaining_time": "4:54:38"} +{"current_steps": 2693, "total_steps": 4048, "loss": 0.2502862811088562, "lr": 5.534873861012763e-06, "epoch": 1.3306561225750648, "percentage": 66.53, "elapsed_time": "9:45:10", "remaining_time": "4:54:25"} +{"current_steps": 2694, "total_steps": 4048, "loss": 0.25752580165863037, "lr": 5.527564491620195e-06, "epoch": 1.331150376868899, "percentage": 66.55, "elapsed_time": "9:45:23", "remaining_time": "4:54:12"} +{"current_steps": 2695, "total_steps": 4048, "loss": 0.22301846742630005, "lr": 5.520258107961671e-06, "epoch": 1.3316446311627332, "percentage": 66.58, "elapsed_time": "9:45:36", "remaining_time": "4:53:59"} +{"current_steps": 2696, "total_steps": 4048, "loss": 0.24581964313983917, "lr": 5.512954714914825e-06, "epoch": 1.3321388854565674, "percentage": 66.6, "elapsed_time": "9:45:49", "remaining_time": "4:53:46"} +{"current_steps": 2697, "total_steps": 4048, "loss": 0.271970272064209, "lr": 5.5056543173553e-06, "epoch": 1.3326331397504017, "percentage": 66.63, "elapsed_time": "9:46:02", "remaining_time": "4:53:33"} +{"current_steps": 2698, "total_steps": 4048, "loss": 0.23041053116321564, "lr": 5.498356920156735e-06, "epoch": 1.3331273940442356, "percentage": 66.65, "elapsed_time": "9:46:16", "remaining_time": "4:53:21"} +{"current_steps": 2699, "total_steps": 4048, "loss": 0.2338491678237915, "lr": 5.491062528190775e-06, "epoch": 1.3336216483380698, "percentage": 66.67, "elapsed_time": "9:46:29", "remaining_time": "4:53:08"} +{"current_steps": 2700, "total_steps": 4048, "loss": 0.2667239010334015, "lr": 5.483771146327037e-06, "epoch": 1.334115902631904, "percentage": 66.7, "elapsed_time": "9:46:42", "remaining_time": "4:52:55"} +{"current_steps": 2701, "total_steps": 4048, "loss": 0.24761441349983215, "lr": 5.4764827794331586e-06, "epoch": 1.3346101569257383, "percentage": 66.72, "elapsed_time": "9:47:00", "remaining_time": "4:52:44"} +{"current_steps": 2702, "total_steps": 4048, "loss": 0.24087639153003693, "lr": 5.469197432374747e-06, "epoch": 1.3351044112195725, "percentage": 66.75, "elapsed_time": "9:47:13", "remaining_time": "4:52:31"} +{"current_steps": 2703, "total_steps": 4048, "loss": 0.26774898171424866, "lr": 5.461915110015386e-06, "epoch": 1.3355986655134067, "percentage": 66.77, "elapsed_time": "9:47:26", "remaining_time": "4:52:18"} +{"current_steps": 2704, "total_steps": 4048, "loss": 0.2820417284965515, "lr": 5.454635817216658e-06, "epoch": 1.3360929198072409, "percentage": 66.8, "elapsed_time": "9:47:40", "remaining_time": "4:52:05"} +{"current_steps": 2705, "total_steps": 4048, "loss": 0.2891086935997009, "lr": 5.447359558838113e-06, "epoch": 1.336587174101075, "percentage": 66.82, "elapsed_time": "9:47:52", "remaining_time": "4:51:52"} +{"current_steps": 2706, "total_steps": 4048, "loss": 0.24551361799240112, "lr": 5.440086339737277e-06, "epoch": 1.3370814283949093, "percentage": 66.85, "elapsed_time": "9:48:06", "remaining_time": "4:51:39"} +{"current_steps": 2707, "total_steps": 4048, "loss": 0.2293522208929062, "lr": 5.432816164769648e-06, "epoch": 1.3375756826887435, "percentage": 66.87, "elapsed_time": "9:48:18", "remaining_time": "4:51:26"} +{"current_steps": 2708, "total_steps": 4048, "loss": 0.22325105965137482, "lr": 5.425549038788693e-06, "epoch": 1.3380699369825775, "percentage": 66.9, "elapsed_time": "9:48:31", "remaining_time": "4:51:13"} +{"current_steps": 2709, "total_steps": 4048, "loss": 0.2263861447572708, "lr": 5.4182849666458315e-06, "epoch": 1.3385641912764117, "percentage": 66.92, "elapsed_time": "9:48:44", "remaining_time": "4:51:00"} +{"current_steps": 2710, "total_steps": 4048, "loss": 0.26902303099632263, "lr": 5.411023953190466e-06, "epoch": 1.339058445570246, "percentage": 66.95, "elapsed_time": "9:48:57", "remaining_time": "4:50:47"} +{"current_steps": 2711, "total_steps": 4048, "loss": 0.26154825091362, "lr": 5.403766003269944e-06, "epoch": 1.33955269986408, "percentage": 66.97, "elapsed_time": "9:49:10", "remaining_time": "4:50:33"} +{"current_steps": 2712, "total_steps": 4048, "loss": 0.2878270745277405, "lr": 5.396511121729562e-06, "epoch": 1.3400469541579143, "percentage": 67.0, "elapsed_time": "9:49:23", "remaining_time": "4:50:21"} +{"current_steps": 2713, "total_steps": 4048, "loss": 0.26206687092781067, "lr": 5.389259313412581e-06, "epoch": 1.3405412084517483, "percentage": 67.02, "elapsed_time": "9:49:36", "remaining_time": "4:50:07"} +{"current_steps": 2714, "total_steps": 4048, "loss": 0.25612518191337585, "lr": 5.382010583160201e-06, "epoch": 1.3410354627455825, "percentage": 67.05, "elapsed_time": "9:49:49", "remaining_time": "4:49:54"} +{"current_steps": 2715, "total_steps": 4048, "loss": 0.25600868463516235, "lr": 5.374764935811574e-06, "epoch": 1.3415297170394167, "percentage": 67.07, "elapsed_time": "9:50:02", "remaining_time": "4:49:41"} +{"current_steps": 2716, "total_steps": 4048, "loss": 0.24837616086006165, "lr": 5.367522376203787e-06, "epoch": 1.342023971333251, "percentage": 67.09, "elapsed_time": "9:50:15", "remaining_time": "4:49:28"} +{"current_steps": 2717, "total_steps": 4048, "loss": 0.23487885296344757, "lr": 5.360282909171875e-06, "epoch": 1.3425182256270851, "percentage": 67.12, "elapsed_time": "9:50:28", "remaining_time": "4:49:15"} +{"current_steps": 2718, "total_steps": 4048, "loss": 0.22786842286586761, "lr": 5.353046539548797e-06, "epoch": 1.3430124799209193, "percentage": 67.14, "elapsed_time": "9:50:40", "remaining_time": "4:49:02"} +{"current_steps": 2719, "total_steps": 4048, "loss": 0.2198137640953064, "lr": 5.3458132721654564e-06, "epoch": 1.3435067342147535, "percentage": 67.17, "elapsed_time": "9:50:54", "remaining_time": "4:48:49"} +{"current_steps": 2720, "total_steps": 4048, "loss": 0.20056495070457458, "lr": 5.338583111850671e-06, "epoch": 1.3440009885085877, "percentage": 67.19, "elapsed_time": "9:51:07", "remaining_time": "4:48:36"} +{"current_steps": 2721, "total_steps": 4048, "loss": 0.21636295318603516, "lr": 5.331356063431195e-06, "epoch": 1.344495242802422, "percentage": 67.22, "elapsed_time": "9:51:21", "remaining_time": "4:48:23"} +{"current_steps": 2722, "total_steps": 4048, "loss": 0.23933230340480804, "lr": 5.32413213173171e-06, "epoch": 1.3449894970962561, "percentage": 67.24, "elapsed_time": "9:51:33", "remaining_time": "4:48:10"} +{"current_steps": 2723, "total_steps": 4048, "loss": 0.2402106523513794, "lr": 5.316911321574799e-06, "epoch": 1.3454837513900901, "percentage": 67.27, "elapsed_time": "9:51:47", "remaining_time": "4:47:57"} +{"current_steps": 2724, "total_steps": 4048, "loss": 0.22524669766426086, "lr": 5.309693637780979e-06, "epoch": 1.3459780056839243, "percentage": 67.29, "elapsed_time": "9:51:59", "remaining_time": "4:47:44"} +{"current_steps": 2725, "total_steps": 4048, "loss": 0.25381600856781006, "lr": 5.302479085168668e-06, "epoch": 1.3464722599777585, "percentage": 67.32, "elapsed_time": "9:52:13", "remaining_time": "4:47:31"} +{"current_steps": 2726, "total_steps": 4048, "loss": 0.2614738643169403, "lr": 5.295267668554202e-06, "epoch": 1.3469665142715928, "percentage": 67.34, "elapsed_time": "9:52:26", "remaining_time": "4:47:18"} +{"current_steps": 2727, "total_steps": 4048, "loss": 0.2701472043991089, "lr": 5.288059392751817e-06, "epoch": 1.347460768565427, "percentage": 67.37, "elapsed_time": "9:52:39", "remaining_time": "4:47:05"} +{"current_steps": 2728, "total_steps": 4048, "loss": 0.2788996696472168, "lr": 5.280854262573661e-06, "epoch": 1.3479550228592612, "percentage": 67.39, "elapsed_time": "9:52:52", "remaining_time": "4:46:52"} +{"current_steps": 2729, "total_steps": 4048, "loss": 0.2419927418231964, "lr": 5.273652282829764e-06, "epoch": 1.3484492771530951, "percentage": 67.42, "elapsed_time": "9:53:05", "remaining_time": "4:46:39"} +{"current_steps": 2730, "total_steps": 4048, "loss": 0.26454097032546997, "lr": 5.266453458328071e-06, "epoch": 1.3489435314469294, "percentage": 67.44, "elapsed_time": "9:53:18", "remaining_time": "4:46:26"} +{"current_steps": 2731, "total_steps": 4048, "loss": 0.24090510606765747, "lr": 5.259257793874421e-06, "epoch": 1.3494377857407636, "percentage": 67.47, "elapsed_time": "9:53:31", "remaining_time": "4:46:13"} +{"current_steps": 2732, "total_steps": 4048, "loss": 0.27343428134918213, "lr": 5.252065294272528e-06, "epoch": 1.3499320400345978, "percentage": 67.49, "elapsed_time": "9:53:44", "remaining_time": "4:46:00"} +{"current_steps": 2733, "total_steps": 4048, "loss": 0.2623448967933655, "lr": 5.244875964324005e-06, "epoch": 1.350426294328432, "percentage": 67.51, "elapsed_time": "9:53:57", "remaining_time": "4:45:47"} +{"current_steps": 2734, "total_steps": 4048, "loss": 0.22721052169799805, "lr": 5.237689808828346e-06, "epoch": 1.3509205486222662, "percentage": 67.54, "elapsed_time": "9:54:10", "remaining_time": "4:45:34"} +{"current_steps": 2735, "total_steps": 4048, "loss": 0.26385387778282166, "lr": 5.230506832582924e-06, "epoch": 1.3514148029161004, "percentage": 67.56, "elapsed_time": "9:54:23", "remaining_time": "4:45:21"} +{"current_steps": 2736, "total_steps": 4048, "loss": 0.2679533064365387, "lr": 5.223327040382995e-06, "epoch": 1.3519090572099346, "percentage": 67.59, "elapsed_time": "9:54:36", "remaining_time": "4:45:08"} +{"current_steps": 2737, "total_steps": 4048, "loss": 0.25042447447776794, "lr": 5.2161504370216855e-06, "epoch": 1.3524033115037688, "percentage": 67.61, "elapsed_time": "9:54:49", "remaining_time": "4:44:54"} +{"current_steps": 2738, "total_steps": 4048, "loss": 0.22735297679901123, "lr": 5.2089770272899845e-06, "epoch": 1.3528975657976028, "percentage": 67.64, "elapsed_time": "9:55:02", "remaining_time": "4:44:42"} +{"current_steps": 2739, "total_steps": 4048, "loss": 0.25517284870147705, "lr": 5.201806815976772e-06, "epoch": 1.353391820091437, "percentage": 67.66, "elapsed_time": "9:55:15", "remaining_time": "4:44:28"} +{"current_steps": 2740, "total_steps": 4048, "loss": 0.2942652702331543, "lr": 5.194639807868767e-06, "epoch": 1.3538860743852712, "percentage": 67.69, "elapsed_time": "9:55:28", "remaining_time": "4:44:15"} +{"current_steps": 2741, "total_steps": 4048, "loss": 0.2605661153793335, "lr": 5.187476007750567e-06, "epoch": 1.3543803286791054, "percentage": 67.71, "elapsed_time": "9:55:41", "remaining_time": "4:44:02"} +{"current_steps": 2742, "total_steps": 4048, "loss": 0.22976648807525635, "lr": 5.1803154204046215e-06, "epoch": 1.3548745829729396, "percentage": 67.74, "elapsed_time": "9:55:53", "remaining_time": "4:43:49"} +{"current_steps": 2743, "total_steps": 4048, "loss": 0.24301470816135406, "lr": 5.173158050611236e-06, "epoch": 1.3553688372667738, "percentage": 67.76, "elapsed_time": "9:56:07", "remaining_time": "4:43:36"} +{"current_steps": 2744, "total_steps": 4048, "loss": 0.2714199125766754, "lr": 5.166003903148568e-06, "epoch": 1.3558630915606078, "percentage": 67.79, "elapsed_time": "9:56:19", "remaining_time": "4:43:23"} +{"current_steps": 2745, "total_steps": 4048, "loss": 0.27004045248031616, "lr": 5.15885298279263e-06, "epoch": 1.356357345854442, "percentage": 67.81, "elapsed_time": "9:56:32", "remaining_time": "4:43:10"} +{"current_steps": 2746, "total_steps": 4048, "loss": 0.2062053680419922, "lr": 5.151705294317262e-06, "epoch": 1.3568516001482762, "percentage": 67.84, "elapsed_time": "9:56:45", "remaining_time": "4:42:57"} +{"current_steps": 2747, "total_steps": 4048, "loss": 0.2589803636074066, "lr": 5.144560842494168e-06, "epoch": 1.3573458544421104, "percentage": 67.86, "elapsed_time": "9:56:58", "remaining_time": "4:42:44"} +{"current_steps": 2748, "total_steps": 4048, "loss": 0.26469242572784424, "lr": 5.137419632092886e-06, "epoch": 1.3578401087359446, "percentage": 67.89, "elapsed_time": "9:57:11", "remaining_time": "4:42:30"} +{"current_steps": 2749, "total_steps": 4048, "loss": 0.26241326332092285, "lr": 5.130281667880774e-06, "epoch": 1.3583343630297788, "percentage": 67.91, "elapsed_time": "9:57:24", "remaining_time": "4:42:18"} +{"current_steps": 2750, "total_steps": 4048, "loss": 0.2674810290336609, "lr": 5.123146954623038e-06, "epoch": 1.358828617323613, "percentage": 67.93, "elapsed_time": "9:57:37", "remaining_time": "4:42:04"} +{"current_steps": 2751, "total_steps": 4048, "loss": 0.23186063766479492, "lr": 5.116015497082719e-06, "epoch": 1.3593228716174472, "percentage": 67.96, "elapsed_time": "9:57:50", "remaining_time": "4:41:51"} +{"current_steps": 2752, "total_steps": 4048, "loss": 0.2794165313243866, "lr": 5.108887300020669e-06, "epoch": 1.3598171259112815, "percentage": 67.98, "elapsed_time": "9:58:03", "remaining_time": "4:41:38"} +{"current_steps": 2753, "total_steps": 4048, "loss": 0.25263023376464844, "lr": 5.1017623681955705e-06, "epoch": 1.3603113802051157, "percentage": 68.01, "elapsed_time": "9:58:16", "remaining_time": "4:41:25"} +{"current_steps": 2754, "total_steps": 4048, "loss": 0.2503500282764435, "lr": 5.0946407063639315e-06, "epoch": 1.3608056344989496, "percentage": 68.03, "elapsed_time": "9:58:29", "remaining_time": "4:41:12"} +{"current_steps": 2755, "total_steps": 4048, "loss": 0.21871569752693176, "lr": 5.087522319280061e-06, "epoch": 1.3612998887927839, "percentage": 68.06, "elapsed_time": "9:58:42", "remaining_time": "4:40:59"} +{"current_steps": 2756, "total_steps": 4048, "loss": 0.2790142893791199, "lr": 5.080407211696103e-06, "epoch": 1.361794143086618, "percentage": 68.08, "elapsed_time": "9:58:55", "remaining_time": "4:40:46"} +{"current_steps": 2757, "total_steps": 4048, "loss": 0.27197304368019104, "lr": 5.073295388362003e-06, "epoch": 1.3622883973804523, "percentage": 68.11, "elapsed_time": "9:59:08", "remaining_time": "4:40:33"} +{"current_steps": 2758, "total_steps": 4048, "loss": 0.2402152568101883, "lr": 5.066186854025502e-06, "epoch": 1.3627826516742865, "percentage": 68.13, "elapsed_time": "9:59:21", "remaining_time": "4:40:20"} +{"current_steps": 2759, "total_steps": 4048, "loss": 0.24418887495994568, "lr": 5.059081613432162e-06, "epoch": 1.3632769059681207, "percentage": 68.16, "elapsed_time": "9:59:34", "remaining_time": "4:40:07"} +{"current_steps": 2760, "total_steps": 4048, "loss": 0.2239491045475006, "lr": 5.05197967132534e-06, "epoch": 1.3637711602619547, "percentage": 68.18, "elapsed_time": "9:59:47", "remaining_time": "4:39:54"} +{"current_steps": 2761, "total_steps": 4048, "loss": 0.25177091360092163, "lr": 5.044881032446192e-06, "epoch": 1.3642654145557889, "percentage": 68.21, "elapsed_time": "9:59:59", "remaining_time": "4:39:40"} +{"current_steps": 2762, "total_steps": 4048, "loss": 0.25462138652801514, "lr": 5.0377857015336655e-06, "epoch": 1.364759668849623, "percentage": 68.23, "elapsed_time": "10:00:13", "remaining_time": "4:39:27"} +{"current_steps": 2763, "total_steps": 4048, "loss": 0.21030092239379883, "lr": 5.0306936833245034e-06, "epoch": 1.3652539231434573, "percentage": 68.26, "elapsed_time": "10:00:25", "remaining_time": "4:39:14"} +{"current_steps": 2764, "total_steps": 4048, "loss": 0.24033348262310028, "lr": 5.0236049825532355e-06, "epoch": 1.3657481774372915, "percentage": 68.28, "elapsed_time": "10:00:39", "remaining_time": "4:39:01"} +{"current_steps": 2765, "total_steps": 4048, "loss": 0.20803815126419067, "lr": 5.016519603952177e-06, "epoch": 1.3662424317311257, "percentage": 68.31, "elapsed_time": "10:00:52", "remaining_time": "4:38:48"} +{"current_steps": 2766, "total_steps": 4048, "loss": 0.21589599549770355, "lr": 5.00943755225143e-06, "epoch": 1.36673668602496, "percentage": 68.33, "elapsed_time": "10:01:05", "remaining_time": "4:38:35"} +{"current_steps": 2767, "total_steps": 4048, "loss": 0.2690975069999695, "lr": 5.00235883217886e-06, "epoch": 1.367230940318794, "percentage": 68.35, "elapsed_time": "10:01:17", "remaining_time": "4:38:22"} +{"current_steps": 2768, "total_steps": 4048, "loss": 0.2368423044681549, "lr": 4.995283448460131e-06, "epoch": 1.3677251946126283, "percentage": 68.38, "elapsed_time": "10:01:31", "remaining_time": "4:38:09"} +{"current_steps": 2769, "total_steps": 4048, "loss": 0.2801262140274048, "lr": 4.988211405818661e-06, "epoch": 1.3682194489064623, "percentage": 68.4, "elapsed_time": "10:01:43", "remaining_time": "4:37:56"} +{"current_steps": 2770, "total_steps": 4048, "loss": 0.2777586877346039, "lr": 4.981142708975647e-06, "epoch": 1.3687137032002965, "percentage": 68.43, "elapsed_time": "10:01:56", "remaining_time": "4:37:42"} +{"current_steps": 2771, "total_steps": 4048, "loss": 0.2400980144739151, "lr": 4.97407736265005e-06, "epoch": 1.3692079574941307, "percentage": 68.45, "elapsed_time": "10:02:09", "remaining_time": "4:37:29"} +{"current_steps": 2772, "total_steps": 4048, "loss": 0.2513861358165741, "lr": 4.967015371558592e-06, "epoch": 1.369702211787965, "percentage": 68.48, "elapsed_time": "10:02:21", "remaining_time": "4:37:16"} +{"current_steps": 2773, "total_steps": 4048, "loss": 0.2785816490650177, "lr": 4.959956740415761e-06, "epoch": 1.3701964660817991, "percentage": 68.5, "elapsed_time": "10:02:35", "remaining_time": "4:37:03"} +{"current_steps": 2774, "total_steps": 4048, "loss": 0.29092347621917725, "lr": 4.9529014739338e-06, "epoch": 1.3706907203756333, "percentage": 68.53, "elapsed_time": "10:02:47", "remaining_time": "4:36:50"} +{"current_steps": 2775, "total_steps": 4048, "loss": 0.27067384123802185, "lr": 4.945849576822693e-06, "epoch": 1.3711849746694673, "percentage": 68.55, "elapsed_time": "10:03:00", "remaining_time": "4:36:37"} +{"current_steps": 2776, "total_steps": 4048, "loss": 0.21500205993652344, "lr": 4.938801053790199e-06, "epoch": 1.3716792289633015, "percentage": 68.58, "elapsed_time": "10:03:13", "remaining_time": "4:36:24"} +{"current_steps": 2777, "total_steps": 4048, "loss": 0.2422936111688614, "lr": 4.931755909541808e-06, "epoch": 1.3721734832571357, "percentage": 68.6, "elapsed_time": "10:03:26", "remaining_time": "4:36:11"} +{"current_steps": 2778, "total_steps": 4048, "loss": 0.2760060727596283, "lr": 4.9247141487807515e-06, "epoch": 1.37266773755097, "percentage": 68.63, "elapsed_time": "10:03:38", "remaining_time": "4:35:57"} +{"current_steps": 2779, "total_steps": 4048, "loss": 0.22626326978206635, "lr": 4.917675776208013e-06, "epoch": 1.3731619918448041, "percentage": 68.65, "elapsed_time": "10:03:51", "remaining_time": "4:35:44"} +{"current_steps": 2780, "total_steps": 4048, "loss": 0.23023411631584167, "lr": 4.910640796522308e-06, "epoch": 1.3736562461386383, "percentage": 68.68, "elapsed_time": "10:04:04", "remaining_time": "4:35:31"} +{"current_steps": 2781, "total_steps": 4048, "loss": 0.22157053649425507, "lr": 4.903609214420088e-06, "epoch": 1.3741505004324726, "percentage": 68.7, "elapsed_time": "10:04:17", "remaining_time": "4:35:18"} +{"current_steps": 2782, "total_steps": 4048, "loss": 0.24125584959983826, "lr": 4.89658103459554e-06, "epoch": 1.3746447547263068, "percentage": 68.73, "elapsed_time": "10:04:29", "remaining_time": "4:35:05"} +{"current_steps": 2783, "total_steps": 4048, "loss": 0.26294079422950745, "lr": 4.889556261740578e-06, "epoch": 1.375139009020141, "percentage": 68.75, "elapsed_time": "10:04:42", "remaining_time": "4:34:52"} +{"current_steps": 2784, "total_steps": 4048, "loss": 0.25327497720718384, "lr": 4.882534900544829e-06, "epoch": 1.3756332633139752, "percentage": 68.77, "elapsed_time": "10:04:55", "remaining_time": "4:34:39"} +{"current_steps": 2785, "total_steps": 4048, "loss": 0.2716723084449768, "lr": 4.875516955695663e-06, "epoch": 1.3761275176078092, "percentage": 68.8, "elapsed_time": "10:05:08", "remaining_time": "4:34:25"} +{"current_steps": 2786, "total_steps": 4048, "loss": 0.2889532446861267, "lr": 4.8685024318781615e-06, "epoch": 1.3766217719016434, "percentage": 68.82, "elapsed_time": "10:05:21", "remaining_time": "4:34:12"} +{"current_steps": 2787, "total_steps": 4048, "loss": 0.23743030428886414, "lr": 4.861491333775114e-06, "epoch": 1.3771160261954776, "percentage": 68.85, "elapsed_time": "10:05:33", "remaining_time": "4:33:59"} +{"current_steps": 2788, "total_steps": 4048, "loss": 0.27180567383766174, "lr": 4.8544836660670305e-06, "epoch": 1.3776102804893118, "percentage": 68.87, "elapsed_time": "10:05:46", "remaining_time": "4:33:46"} +{"current_steps": 2789, "total_steps": 4048, "loss": 0.2549944221973419, "lr": 4.847479433432131e-06, "epoch": 1.378104534783146, "percentage": 68.9, "elapsed_time": "10:05:59", "remaining_time": "4:33:33"} +{"current_steps": 2790, "total_steps": 4048, "loss": 0.24112319946289062, "lr": 4.8404786405463414e-06, "epoch": 1.37859878907698, "percentage": 68.92, "elapsed_time": "10:06:12", "remaining_time": "4:33:20"} +{"current_steps": 2791, "total_steps": 4048, "loss": 0.22865869104862213, "lr": 4.833481292083291e-06, "epoch": 1.3790930433708142, "percentage": 68.95, "elapsed_time": "10:06:24", "remaining_time": "4:33:06"} +{"current_steps": 2792, "total_steps": 4048, "loss": 0.24851003289222717, "lr": 4.82648739271431e-06, "epoch": 1.3795872976646484, "percentage": 68.97, "elapsed_time": "10:06:38", "remaining_time": "4:32:53"} +{"current_steps": 2793, "total_steps": 4048, "loss": 0.251456081867218, "lr": 4.819496947108424e-06, "epoch": 1.3800815519584826, "percentage": 69.0, "elapsed_time": "10:06:50", "remaining_time": "4:32:40"} +{"current_steps": 2794, "total_steps": 4048, "loss": 0.31711041927337646, "lr": 4.81250995993236e-06, "epoch": 1.3805758062523168, "percentage": 69.02, "elapsed_time": "10:07:03", "remaining_time": "4:32:27"} +{"current_steps": 2795, "total_steps": 4048, "loss": 0.2204340100288391, "lr": 4.805526435850523e-06, "epoch": 1.381070060546151, "percentage": 69.05, "elapsed_time": "10:07:16", "remaining_time": "4:32:14"} +{"current_steps": 2796, "total_steps": 4048, "loss": 0.26289406418800354, "lr": 4.798546379525013e-06, "epoch": 1.3815643148399852, "percentage": 69.07, "elapsed_time": "10:07:29", "remaining_time": "4:32:01"} +{"current_steps": 2797, "total_steps": 4048, "loss": 0.24830611050128937, "lr": 4.7915697956156284e-06, "epoch": 1.3820585691338194, "percentage": 69.1, "elapsed_time": "10:07:42", "remaining_time": "4:31:48"} +{"current_steps": 2798, "total_steps": 4048, "loss": 0.24792183935642242, "lr": 4.784596688779825e-06, "epoch": 1.3825528234276536, "percentage": 69.12, "elapsed_time": "10:07:54", "remaining_time": "4:31:35"} +{"current_steps": 2799, "total_steps": 4048, "loss": 0.2689560651779175, "lr": 4.777627063672753e-06, "epoch": 1.3830470777214878, "percentage": 69.15, "elapsed_time": "10:08:07", "remaining_time": "4:31:21"} +{"current_steps": 2800, "total_steps": 4048, "loss": 0.24323254823684692, "lr": 4.770660924947238e-06, "epoch": 1.3835413320153218, "percentage": 69.17, "elapsed_time": "10:08:20", "remaining_time": "4:31:08"} +{"current_steps": 2801, "total_steps": 4048, "loss": 0.24404528737068176, "lr": 4.7636982772537645e-06, "epoch": 1.384035586309156, "percentage": 69.19, "elapsed_time": "10:08:39", "remaining_time": "4:30:58"} +{"current_steps": 2802, "total_steps": 4048, "loss": 0.23512448370456696, "lr": 4.7567391252405075e-06, "epoch": 1.3845298406029902, "percentage": 69.22, "elapsed_time": "10:08:52", "remaining_time": "4:30:45"} +{"current_steps": 2803, "total_steps": 4048, "loss": 0.26446110010147095, "lr": 4.749783473553297e-06, "epoch": 1.3850240948968244, "percentage": 69.24, "elapsed_time": "10:09:06", "remaining_time": "4:30:32"} +{"current_steps": 2804, "total_steps": 4048, "loss": 0.24630968272686005, "lr": 4.742831326835618e-06, "epoch": 1.3855183491906586, "percentage": 69.27, "elapsed_time": "10:09:18", "remaining_time": "4:30:19"} +{"current_steps": 2805, "total_steps": 4048, "loss": 0.253492146730423, "lr": 4.735882689728628e-06, "epoch": 1.3860126034844928, "percentage": 69.29, "elapsed_time": "10:09:32", "remaining_time": "4:30:06"} +{"current_steps": 2806, "total_steps": 4048, "loss": 0.271090567111969, "lr": 4.7289375668711444e-06, "epoch": 1.3865068577783268, "percentage": 69.32, "elapsed_time": "10:09:45", "remaining_time": "4:29:53"} +{"current_steps": 2807, "total_steps": 4048, "loss": 0.24045832455158234, "lr": 4.721995962899625e-06, "epoch": 1.387001112072161, "percentage": 69.34, "elapsed_time": "10:09:58", "remaining_time": "4:29:40"} +{"current_steps": 2808, "total_steps": 4048, "loss": 0.2525935471057892, "lr": 4.715057882448187e-06, "epoch": 1.3874953663659952, "percentage": 69.37, "elapsed_time": "10:10:11", "remaining_time": "4:29:27"} +{"current_steps": 2809, "total_steps": 4048, "loss": 0.30852392315864563, "lr": 4.708123330148593e-06, "epoch": 1.3879896206598294, "percentage": 69.39, "elapsed_time": "10:10:25", "remaining_time": "4:29:14"} +{"current_steps": 2810, "total_steps": 4048, "loss": 0.2770250737667084, "lr": 4.701192310630253e-06, "epoch": 1.3884838749536637, "percentage": 69.42, "elapsed_time": "10:10:37", "remaining_time": "4:29:01"} +{"current_steps": 2811, "total_steps": 4048, "loss": 0.29135680198669434, "lr": 4.6942648285202154e-06, "epoch": 1.3889781292474979, "percentage": 69.44, "elapsed_time": "10:10:51", "remaining_time": "4:28:48"} +{"current_steps": 2812, "total_steps": 4048, "loss": 0.26933860778808594, "lr": 4.687340888443171e-06, "epoch": 1.389472383541332, "percentage": 69.47, "elapsed_time": "10:11:04", "remaining_time": "4:28:35"} +{"current_steps": 2813, "total_steps": 4048, "loss": 0.26089105010032654, "lr": 4.680420495021436e-06, "epoch": 1.3899666378351663, "percentage": 69.49, "elapsed_time": "10:11:17", "remaining_time": "4:28:22"} +{"current_steps": 2814, "total_steps": 4048, "loss": 0.26031410694122314, "lr": 4.673503652874977e-06, "epoch": 1.3904608921290005, "percentage": 69.52, "elapsed_time": "10:11:29", "remaining_time": "4:28:09"} +{"current_steps": 2815, "total_steps": 4048, "loss": 0.2887076139450073, "lr": 4.6665903666213685e-06, "epoch": 1.3909551464228345, "percentage": 69.54, "elapsed_time": "10:11:43", "remaining_time": "4:27:56"} +{"current_steps": 2816, "total_steps": 4048, "loss": 0.2360706925392151, "lr": 4.6596806408758275e-06, "epoch": 1.3914494007166687, "percentage": 69.57, "elapsed_time": "10:11:56", "remaining_time": "4:27:43"} +{"current_steps": 2817, "total_steps": 4048, "loss": 0.22275522351264954, "lr": 4.652774480251186e-06, "epoch": 1.3919436550105029, "percentage": 69.59, "elapsed_time": "10:12:09", "remaining_time": "4:27:30"} +{"current_steps": 2818, "total_steps": 4048, "loss": 0.2425977736711502, "lr": 4.645871889357899e-06, "epoch": 1.392437909304337, "percentage": 69.61, "elapsed_time": "10:12:22", "remaining_time": "4:27:17"} +{"current_steps": 2819, "total_steps": 4048, "loss": 0.25219830870628357, "lr": 4.638972872804038e-06, "epoch": 1.3929321635981713, "percentage": 69.64, "elapsed_time": "10:12:35", "remaining_time": "4:27:04"} +{"current_steps": 2820, "total_steps": 4048, "loss": 0.28060346841812134, "lr": 4.6320774351952916e-06, "epoch": 1.3934264178920055, "percentage": 69.66, "elapsed_time": "10:12:48", "remaining_time": "4:26:51"} +{"current_steps": 2821, "total_steps": 4048, "loss": 0.2395240217447281, "lr": 4.625185581134942e-06, "epoch": 1.3939206721858395, "percentage": 69.69, "elapsed_time": "10:13:01", "remaining_time": "4:26:38"} +{"current_steps": 2822, "total_steps": 4048, "loss": 0.23622646927833557, "lr": 4.618297315223906e-06, "epoch": 1.3944149264796737, "percentage": 69.71, "elapsed_time": "10:13:15", "remaining_time": "4:26:25"} +{"current_steps": 2823, "total_steps": 4048, "loss": 0.2189474105834961, "lr": 4.611412642060692e-06, "epoch": 1.394909180773508, "percentage": 69.74, "elapsed_time": "10:13:28", "remaining_time": "4:26:12"} +{"current_steps": 2824, "total_steps": 4048, "loss": 0.266002357006073, "lr": 4.6045315662414e-06, "epoch": 1.395403435067342, "percentage": 69.76, "elapsed_time": "10:13:41", "remaining_time": "4:25:59"} +{"current_steps": 2825, "total_steps": 4048, "loss": 0.2402176856994629, "lr": 4.5976540923597425e-06, "epoch": 1.3958976893611763, "percentage": 69.79, "elapsed_time": "10:13:54", "remaining_time": "4:25:46"} +{"current_steps": 2826, "total_steps": 4048, "loss": 0.2493474781513214, "lr": 4.5907802250070235e-06, "epoch": 1.3963919436550105, "percentage": 69.81, "elapsed_time": "10:14:07", "remaining_time": "4:25:33"} +{"current_steps": 2827, "total_steps": 4048, "loss": 0.25716543197631836, "lr": 4.583909968772137e-06, "epoch": 1.3968861979488447, "percentage": 69.84, "elapsed_time": "10:14:20", "remaining_time": "4:25:20"} +{"current_steps": 2828, "total_steps": 4048, "loss": 0.29470473527908325, "lr": 4.57704332824157e-06, "epoch": 1.397380452242679, "percentage": 69.86, "elapsed_time": "10:14:33", "remaining_time": "4:25:07"} +{"current_steps": 2829, "total_steps": 4048, "loss": 0.28095656633377075, "lr": 4.570180307999394e-06, "epoch": 1.3978747065365131, "percentage": 69.89, "elapsed_time": "10:14:46", "remaining_time": "4:24:54"} +{"current_steps": 2830, "total_steps": 4048, "loss": 0.2351825088262558, "lr": 4.563320912627256e-06, "epoch": 1.3983689608303473, "percentage": 69.91, "elapsed_time": "10:14:59", "remaining_time": "4:24:41"} +{"current_steps": 2831, "total_steps": 4048, "loss": 0.25859856605529785, "lr": 4.556465146704399e-06, "epoch": 1.3988632151241813, "percentage": 69.94, "elapsed_time": "10:15:11", "remaining_time": "4:24:27"} +{"current_steps": 2832, "total_steps": 4048, "loss": 0.2503181993961334, "lr": 4.549613014807637e-06, "epoch": 1.3993574694180155, "percentage": 69.96, "elapsed_time": "10:15:25", "remaining_time": "4:24:15"} +{"current_steps": 2833, "total_steps": 4048, "loss": 0.26368820667266846, "lr": 4.542764521511345e-06, "epoch": 1.3998517237118497, "percentage": 69.99, "elapsed_time": "10:15:38", "remaining_time": "4:24:01"} +{"current_steps": 2834, "total_steps": 4048, "loss": 0.24077676236629486, "lr": 4.535919671387483e-06, "epoch": 1.400345978005684, "percentage": 70.01, "elapsed_time": "10:15:51", "remaining_time": "4:23:48"} +{"current_steps": 2835, "total_steps": 4048, "loss": 0.27042093873023987, "lr": 4.529078469005577e-06, "epoch": 1.4008402322995182, "percentage": 70.03, "elapsed_time": "10:16:04", "remaining_time": "4:23:35"} +{"current_steps": 2836, "total_steps": 4048, "loss": 0.2731306552886963, "lr": 4.5222409189327155e-06, "epoch": 1.4013344865933524, "percentage": 70.06, "elapsed_time": "10:16:17", "remaining_time": "4:23:22"} +{"current_steps": 2837, "total_steps": 4048, "loss": 0.2925037741661072, "lr": 4.515407025733548e-06, "epoch": 1.4018287408871863, "percentage": 70.08, "elapsed_time": "10:16:30", "remaining_time": "4:23:09"} +{"current_steps": 2838, "total_steps": 4048, "loss": 0.2927025556564331, "lr": 4.508576793970285e-06, "epoch": 1.4023229951810205, "percentage": 70.11, "elapsed_time": "10:16:43", "remaining_time": "4:22:56"} +{"current_steps": 2839, "total_steps": 4048, "loss": 0.26285338401794434, "lr": 4.5017502282026926e-06, "epoch": 1.4028172494748548, "percentage": 70.13, "elapsed_time": "10:16:57", "remaining_time": "4:22:44"} +{"current_steps": 2840, "total_steps": 4048, "loss": 0.22698873281478882, "lr": 4.49492733298809e-06, "epoch": 1.403311503768689, "percentage": 70.16, "elapsed_time": "10:17:10", "remaining_time": "4:22:31"} +{"current_steps": 2841, "total_steps": 4048, "loss": 0.24116170406341553, "lr": 4.488108112881339e-06, "epoch": 1.4038057580625232, "percentage": 70.18, "elapsed_time": "10:17:23", "remaining_time": "4:22:18"} +{"current_steps": 2842, "total_steps": 4048, "loss": 0.3211704194545746, "lr": 4.481292572434852e-06, "epoch": 1.4043000123563574, "percentage": 70.21, "elapsed_time": "10:17:36", "remaining_time": "4:22:05"} +{"current_steps": 2843, "total_steps": 4048, "loss": 0.26634523272514343, "lr": 4.474480716198598e-06, "epoch": 1.4047942666501916, "percentage": 70.23, "elapsed_time": "10:17:50", "remaining_time": "4:21:52"} +{"current_steps": 2844, "total_steps": 4048, "loss": 0.24751242995262146, "lr": 4.467672548720066e-06, "epoch": 1.4052885209440258, "percentage": 70.26, "elapsed_time": "10:18:03", "remaining_time": "4:21:39"} +{"current_steps": 2845, "total_steps": 4048, "loss": 0.22031354904174805, "lr": 4.4608680745442915e-06, "epoch": 1.40578277523786, "percentage": 70.28, "elapsed_time": "10:18:16", "remaining_time": "4:21:26"} +{"current_steps": 2846, "total_steps": 4048, "loss": 0.2474634051322937, "lr": 4.454067298213847e-06, "epoch": 1.406277029531694, "percentage": 70.31, "elapsed_time": "10:18:29", "remaining_time": "4:21:13"} +{"current_steps": 2847, "total_steps": 4048, "loss": 0.2494845986366272, "lr": 4.4472702242688315e-06, "epoch": 1.4067712838255282, "percentage": 70.33, "elapsed_time": "10:18:43", "remaining_time": "4:21:00"} +{"current_steps": 2848, "total_steps": 4048, "loss": 0.23150494694709778, "lr": 4.440476857246876e-06, "epoch": 1.4072655381193624, "percentage": 70.36, "elapsed_time": "10:18:56", "remaining_time": "4:20:47"} +{"current_steps": 2849, "total_steps": 4048, "loss": 0.2093413770198822, "lr": 4.433687201683138e-06, "epoch": 1.4077597924131966, "percentage": 70.38, "elapsed_time": "10:19:09", "remaining_time": "4:20:34"} +{"current_steps": 2850, "total_steps": 4048, "loss": 0.26741865277290344, "lr": 4.426901262110287e-06, "epoch": 1.4082540467070308, "percentage": 70.41, "elapsed_time": "10:19:22", "remaining_time": "4:20:21"} +{"current_steps": 2851, "total_steps": 4048, "loss": 0.2599044740200043, "lr": 4.420119043058521e-06, "epoch": 1.408748301000865, "percentage": 70.43, "elapsed_time": "10:19:35", "remaining_time": "4:20:08"} +{"current_steps": 2852, "total_steps": 4048, "loss": 0.26934683322906494, "lr": 4.413340549055562e-06, "epoch": 1.409242555294699, "percentage": 70.45, "elapsed_time": "10:19:48", "remaining_time": "4:19:55"} +{"current_steps": 2853, "total_steps": 4048, "loss": 0.2609720528125763, "lr": 4.4065657846266255e-06, "epoch": 1.4097368095885332, "percentage": 70.48, "elapsed_time": "10:20:01", "remaining_time": "4:19:42"} +{"current_steps": 2854, "total_steps": 4048, "loss": 0.23431813716888428, "lr": 4.39979475429445e-06, "epoch": 1.4102310638823674, "percentage": 70.5, "elapsed_time": "10:20:15", "remaining_time": "4:19:29"} +{"current_steps": 2855, "total_steps": 4048, "loss": 0.2791878581047058, "lr": 4.39302746257928e-06, "epoch": 1.4107253181762016, "percentage": 70.53, "elapsed_time": "10:20:27", "remaining_time": "4:19:16"} +{"current_steps": 2856, "total_steps": 4048, "loss": 0.30482247471809387, "lr": 4.386263913998862e-06, "epoch": 1.4112195724700358, "percentage": 70.55, "elapsed_time": "10:20:41", "remaining_time": "4:19:03"} +{"current_steps": 2857, "total_steps": 4048, "loss": 0.24561305344104767, "lr": 4.379504113068445e-06, "epoch": 1.41171382676387, "percentage": 70.58, "elapsed_time": "10:20:54", "remaining_time": "4:18:50"} +{"current_steps": 2858, "total_steps": 4048, "loss": 0.23973286151885986, "lr": 4.372748064300777e-06, "epoch": 1.4122080810577042, "percentage": 70.6, "elapsed_time": "10:21:07", "remaining_time": "4:18:37"} +{"current_steps": 2859, "total_steps": 4048, "loss": 0.26788556575775146, "lr": 4.365995772206092e-06, "epoch": 1.4127023353515384, "percentage": 70.63, "elapsed_time": "10:21:20", "remaining_time": "4:18:24"} +{"current_steps": 2860, "total_steps": 4048, "loss": 0.22432288527488708, "lr": 4.359247241292136e-06, "epoch": 1.4131965896453726, "percentage": 70.65, "elapsed_time": "10:21:34", "remaining_time": "4:18:11"} +{"current_steps": 2861, "total_steps": 4048, "loss": 0.282687783241272, "lr": 4.352502476064121e-06, "epoch": 1.4136908439392069, "percentage": 70.68, "elapsed_time": "10:21:47", "remaining_time": "4:17:58"} +{"current_steps": 2862, "total_steps": 4048, "loss": 0.2516692578792572, "lr": 4.345761481024761e-06, "epoch": 1.4141850982330408, "percentage": 70.7, "elapsed_time": "10:22:00", "remaining_time": "4:17:45"} +{"current_steps": 2863, "total_steps": 4048, "loss": 0.2473583221435547, "lr": 4.3390242606742465e-06, "epoch": 1.414679352526875, "percentage": 70.73, "elapsed_time": "10:22:13", "remaining_time": "4:17:32"} +{"current_steps": 2864, "total_steps": 4048, "loss": 0.24372908473014832, "lr": 4.33229081951025e-06, "epoch": 1.4151736068207093, "percentage": 70.75, "elapsed_time": "10:22:27", "remaining_time": "4:17:19"} +{"current_steps": 2865, "total_steps": 4048, "loss": 0.2877897024154663, "lr": 4.325561162027922e-06, "epoch": 1.4156678611145435, "percentage": 70.78, "elapsed_time": "10:22:40", "remaining_time": "4:17:06"} +{"current_steps": 2866, "total_steps": 4048, "loss": 0.2554720342159271, "lr": 4.318835292719886e-06, "epoch": 1.4161621154083777, "percentage": 70.8, "elapsed_time": "10:22:53", "remaining_time": "4:16:53"} +{"current_steps": 2867, "total_steps": 4048, "loss": 0.26695260405540466, "lr": 4.312113216076228e-06, "epoch": 1.4166563697022119, "percentage": 70.83, "elapsed_time": "10:23:06", "remaining_time": "4:16:40"} +{"current_steps": 2868, "total_steps": 4048, "loss": 0.26983851194381714, "lr": 4.305394936584522e-06, "epoch": 1.4171506239960459, "percentage": 70.85, "elapsed_time": "10:23:19", "remaining_time": "4:16:27"} +{"current_steps": 2869, "total_steps": 4048, "loss": 0.303170382976532, "lr": 4.298680458729793e-06, "epoch": 1.41764487828988, "percentage": 70.87, "elapsed_time": "10:23:32", "remaining_time": "4:16:14"} +{"current_steps": 2870, "total_steps": 4048, "loss": 0.23217584192752838, "lr": 4.2919697869945234e-06, "epoch": 1.4181391325837143, "percentage": 70.9, "elapsed_time": "10:23:45", "remaining_time": "4:16:01"} +{"current_steps": 2871, "total_steps": 4048, "loss": 0.2895517349243164, "lr": 4.285262925858663e-06, "epoch": 1.4186333868775485, "percentage": 70.92, "elapsed_time": "10:23:59", "remaining_time": "4:15:48"} +{"current_steps": 2872, "total_steps": 4048, "loss": 0.24025630950927734, "lr": 4.278559879799628e-06, "epoch": 1.4191276411713827, "percentage": 70.95, "elapsed_time": "10:24:11", "remaining_time": "4:15:35"} +{"current_steps": 2873, "total_steps": 4048, "loss": 0.22810839116573334, "lr": 4.271860653292263e-06, "epoch": 1.4196218954652169, "percentage": 70.97, "elapsed_time": "10:24:25", "remaining_time": "4:15:22"} +{"current_steps": 2874, "total_steps": 4048, "loss": 0.266724169254303, "lr": 4.26516525080888e-06, "epoch": 1.420116149759051, "percentage": 71.0, "elapsed_time": "10:24:37", "remaining_time": "4:15:09"} +{"current_steps": 2875, "total_steps": 4048, "loss": 0.22618745267391205, "lr": 4.25847367681924e-06, "epoch": 1.4206104040528853, "percentage": 71.02, "elapsed_time": "10:24:51", "remaining_time": "4:14:56"} +{"current_steps": 2876, "total_steps": 4048, "loss": 0.2239789217710495, "lr": 4.251785935790529e-06, "epoch": 1.4211046583467195, "percentage": 71.05, "elapsed_time": "10:25:04", "remaining_time": "4:14:43"} +{"current_steps": 2877, "total_steps": 4048, "loss": 0.21519358456134796, "lr": 4.245102032187399e-06, "epoch": 1.4215989126405535, "percentage": 71.07, "elapsed_time": "10:25:17", "remaining_time": "4:14:30"} +{"current_steps": 2878, "total_steps": 4048, "loss": 0.31226712465286255, "lr": 4.2384219704719284e-06, "epoch": 1.4220931669343877, "percentage": 71.1, "elapsed_time": "10:25:31", "remaining_time": "4:14:17"} +{"current_steps": 2879, "total_steps": 4048, "loss": 0.26814836263656616, "lr": 4.231745755103625e-06, "epoch": 1.422587421228222, "percentage": 71.12, "elapsed_time": "10:25:44", "remaining_time": "4:14:04"} +{"current_steps": 2880, "total_steps": 4048, "loss": 0.2369621843099594, "lr": 4.225073390539436e-06, "epoch": 1.423081675522056, "percentage": 71.15, "elapsed_time": "10:25:57", "remaining_time": "4:13:51"} +{"current_steps": 2881, "total_steps": 4048, "loss": 0.2556746304035187, "lr": 4.218404881233737e-06, "epoch": 1.4235759298158903, "percentage": 71.17, "elapsed_time": "10:26:10", "remaining_time": "4:13:38"} +{"current_steps": 2882, "total_steps": 4048, "loss": 0.25875598192214966, "lr": 4.2117402316383314e-06, "epoch": 1.4240701841097245, "percentage": 71.2, "elapsed_time": "10:26:24", "remaining_time": "4:13:25"} +{"current_steps": 2883, "total_steps": 4048, "loss": 0.26839762926101685, "lr": 4.205079446202443e-06, "epoch": 1.4245644384035585, "percentage": 71.22, "elapsed_time": "10:26:37", "remaining_time": "4:13:12"} +{"current_steps": 2884, "total_steps": 4048, "loss": 0.2764383554458618, "lr": 4.198422529372717e-06, "epoch": 1.4250586926973927, "percentage": 71.25, "elapsed_time": "10:26:50", "remaining_time": "4:12:59"} +{"current_steps": 2885, "total_steps": 4048, "loss": 0.24517112970352173, "lr": 4.191769485593216e-06, "epoch": 1.425552946991227, "percentage": 71.27, "elapsed_time": "10:27:03", "remaining_time": "4:12:46"} +{"current_steps": 2886, "total_steps": 4048, "loss": 0.21880990266799927, "lr": 4.18512031930542e-06, "epoch": 1.4260472012850611, "percentage": 71.29, "elapsed_time": "10:27:16", "remaining_time": "4:12:33"} +{"current_steps": 2887, "total_steps": 4048, "loss": 0.24671246111392975, "lr": 4.178475034948212e-06, "epoch": 1.4265414555788953, "percentage": 71.32, "elapsed_time": "10:27:29", "remaining_time": "4:12:20"} +{"current_steps": 2888, "total_steps": 4048, "loss": 0.25473371148109436, "lr": 4.171833636957886e-06, "epoch": 1.4270357098727295, "percentage": 71.34, "elapsed_time": "10:27:43", "remaining_time": "4:12:07"} +{"current_steps": 2889, "total_steps": 4048, "loss": 0.2675618529319763, "lr": 4.1651961297681574e-06, "epoch": 1.4275299641665637, "percentage": 71.37, "elapsed_time": "10:27:55", "remaining_time": "4:11:54"} +{"current_steps": 2890, "total_steps": 4048, "loss": 0.24357986450195312, "lr": 4.15856251781012e-06, "epoch": 1.428024218460398, "percentage": 71.39, "elapsed_time": "10:28:08", "remaining_time": "4:11:41"} +{"current_steps": 2891, "total_steps": 4048, "loss": 0.2668409049510956, "lr": 4.1519328055122825e-06, "epoch": 1.4285184727542322, "percentage": 71.42, "elapsed_time": "10:28:21", "remaining_time": "4:11:28"} +{"current_steps": 2892, "total_steps": 4048, "loss": 0.24507637321949005, "lr": 4.145306997300543e-06, "epoch": 1.4290127270480664, "percentage": 71.44, "elapsed_time": "10:28:33", "remaining_time": "4:11:15"} +{"current_steps": 2893, "total_steps": 4048, "loss": 0.2791709899902344, "lr": 4.1386850975982e-06, "epoch": 1.4295069813419004, "percentage": 71.47, "elapsed_time": "10:28:47", "remaining_time": "4:11:02"} +{"current_steps": 2894, "total_steps": 4048, "loss": 0.24982133507728577, "lr": 4.132067110825939e-06, "epoch": 1.4300012356357346, "percentage": 71.49, "elapsed_time": "10:28:59", "remaining_time": "4:10:49"} +{"current_steps": 2895, "total_steps": 4048, "loss": 0.2814679741859436, "lr": 4.125453041401835e-06, "epoch": 1.4304954899295688, "percentage": 71.52, "elapsed_time": "10:29:12", "remaining_time": "4:10:35"} +{"current_steps": 2896, "total_steps": 4048, "loss": 0.22699782252311707, "lr": 4.118842893741336e-06, "epoch": 1.430989744223403, "percentage": 71.54, "elapsed_time": "10:29:25", "remaining_time": "4:10:22"} +{"current_steps": 2897, "total_steps": 4048, "loss": 0.23297230899333954, "lr": 4.112236672257294e-06, "epoch": 1.4314839985172372, "percentage": 71.57, "elapsed_time": "10:29:38", "remaining_time": "4:10:09"} +{"current_steps": 2898, "total_steps": 4048, "loss": 0.26085159182548523, "lr": 4.1056343813599265e-06, "epoch": 1.4319782528110712, "percentage": 71.59, "elapsed_time": "10:29:51", "remaining_time": "4:09:56"} +{"current_steps": 2899, "total_steps": 4048, "loss": 0.27813559770584106, "lr": 4.0990360254568216e-06, "epoch": 1.4324725071049054, "percentage": 71.62, "elapsed_time": "10:30:04", "remaining_time": "4:09:43"} +{"current_steps": 2900, "total_steps": 4048, "loss": 0.2821611762046814, "lr": 4.092441608952953e-06, "epoch": 1.4329667613987396, "percentage": 71.64, "elapsed_time": "10:30:16", "remaining_time": "4:09:30"} +{"current_steps": 2901, "total_steps": 4048, "loss": 0.25223150849342346, "lr": 4.085851136250657e-06, "epoch": 1.4334610156925738, "percentage": 71.67, "elapsed_time": "10:30:35", "remaining_time": "4:09:19"} +{"current_steps": 2902, "total_steps": 4048, "loss": 0.225361630320549, "lr": 4.079264611749639e-06, "epoch": 1.433955269986408, "percentage": 71.69, "elapsed_time": "10:30:48", "remaining_time": "4:09:06"} +{"current_steps": 2903, "total_steps": 4048, "loss": 0.2564583420753479, "lr": 4.07268203984697e-06, "epoch": 1.4344495242802422, "percentage": 71.71, "elapsed_time": "10:31:01", "remaining_time": "4:08:53"} +{"current_steps": 2904, "total_steps": 4048, "loss": 0.2433827817440033, "lr": 4.066103424937083e-06, "epoch": 1.4349437785740764, "percentage": 71.74, "elapsed_time": "10:31:14", "remaining_time": "4:08:40"} +{"current_steps": 2905, "total_steps": 4048, "loss": 0.26073208451271057, "lr": 4.059528771411758e-06, "epoch": 1.4354380328679106, "percentage": 71.76, "elapsed_time": "10:31:27", "remaining_time": "4:08:27"} +{"current_steps": 2906, "total_steps": 4048, "loss": 0.2937609553337097, "lr": 4.052958083660153e-06, "epoch": 1.4359322871617448, "percentage": 71.79, "elapsed_time": "10:31:40", "remaining_time": "4:08:14"} +{"current_steps": 2907, "total_steps": 4048, "loss": 0.22026552259922028, "lr": 4.046391366068756e-06, "epoch": 1.436426541455579, "percentage": 71.81, "elapsed_time": "10:31:53", "remaining_time": "4:08:01"} +{"current_steps": 2908, "total_steps": 4048, "loss": 0.21137471497058868, "lr": 4.039828623021415e-06, "epoch": 1.436920795749413, "percentage": 71.84, "elapsed_time": "10:32:07", "remaining_time": "4:07:48"} +{"current_steps": 2909, "total_steps": 4048, "loss": 0.23597699403762817, "lr": 4.033269858899324e-06, "epoch": 1.4374150500432472, "percentage": 71.86, "elapsed_time": "10:32:20", "remaining_time": "4:07:35"} +{"current_steps": 2910, "total_steps": 4048, "loss": 0.2667025923728943, "lr": 4.026715078081023e-06, "epoch": 1.4379093043370814, "percentage": 71.89, "elapsed_time": "10:32:33", "remaining_time": "4:07:22"} +{"current_steps": 2911, "total_steps": 4048, "loss": 0.2789616584777832, "lr": 4.020164284942387e-06, "epoch": 1.4384035586309156, "percentage": 71.91, "elapsed_time": "10:32:46", "remaining_time": "4:07:09"} +{"current_steps": 2912, "total_steps": 4048, "loss": 0.23176617920398712, "lr": 4.013617483856637e-06, "epoch": 1.4388978129247498, "percentage": 71.94, "elapsed_time": "10:33:00", "remaining_time": "4:06:56"} +{"current_steps": 2913, "total_steps": 4048, "loss": 0.2814248204231262, "lr": 4.007074679194313e-06, "epoch": 1.439392067218584, "percentage": 71.96, "elapsed_time": "10:33:12", "remaining_time": "4:06:43"} +{"current_steps": 2914, "total_steps": 4048, "loss": 0.26201730966567993, "lr": 4.000535875323307e-06, "epoch": 1.439886321512418, "percentage": 71.99, "elapsed_time": "10:33:25", "remaining_time": "4:06:30"} +{"current_steps": 2915, "total_steps": 4048, "loss": 0.22517681121826172, "lr": 3.994001076608833e-06, "epoch": 1.4403805758062522, "percentage": 72.01, "elapsed_time": "10:33:38", "remaining_time": "4:06:17"} +{"current_steps": 2916, "total_steps": 4048, "loss": 0.25220564007759094, "lr": 3.9874702874134205e-06, "epoch": 1.4408748301000864, "percentage": 72.04, "elapsed_time": "10:33:52", "remaining_time": "4:06:04"} +{"current_steps": 2917, "total_steps": 4048, "loss": 0.23441332578659058, "lr": 3.980943512096934e-06, "epoch": 1.4413690843939206, "percentage": 72.06, "elapsed_time": "10:34:04", "remaining_time": "4:05:51"} +{"current_steps": 2918, "total_steps": 4048, "loss": 0.21659764647483826, "lr": 3.9744207550165625e-06, "epoch": 1.4418633386877548, "percentage": 72.08, "elapsed_time": "10:34:18", "remaining_time": "4:05:38"} +{"current_steps": 2919, "total_steps": 4048, "loss": 0.21888667345046997, "lr": 3.967902020526797e-06, "epoch": 1.442357592981589, "percentage": 72.11, "elapsed_time": "10:34:31", "remaining_time": "4:05:24"} +{"current_steps": 2920, "total_steps": 4048, "loss": 0.2771157920360565, "lr": 3.961387312979454e-06, "epoch": 1.4428518472754233, "percentage": 72.13, "elapsed_time": "10:34:43", "remaining_time": "4:05:11"} +{"current_steps": 2921, "total_steps": 4048, "loss": 0.21376901865005493, "lr": 3.9548766367236605e-06, "epoch": 1.4433461015692575, "percentage": 72.16, "elapsed_time": "10:34:56", "remaining_time": "4:04:58"} +{"current_steps": 2922, "total_steps": 4048, "loss": 0.2888128161430359, "lr": 3.948369996105849e-06, "epoch": 1.4438403558630917, "percentage": 72.18, "elapsed_time": "10:35:09", "remaining_time": "4:04:45"} +{"current_steps": 2923, "total_steps": 4048, "loss": 0.27809786796569824, "lr": 3.941867395469761e-06, "epoch": 1.4443346101569257, "percentage": 72.21, "elapsed_time": "10:35:22", "remaining_time": "4:04:32"} +{"current_steps": 2924, "total_steps": 4048, "loss": 0.2573625445365906, "lr": 3.935368839156443e-06, "epoch": 1.4448288644507599, "percentage": 72.23, "elapsed_time": "10:35:35", "remaining_time": "4:04:19"} +{"current_steps": 2925, "total_steps": 4048, "loss": 0.21472841501235962, "lr": 3.928874331504232e-06, "epoch": 1.445323118744594, "percentage": 72.26, "elapsed_time": "10:35:48", "remaining_time": "4:04:06"} +{"current_steps": 2926, "total_steps": 4048, "loss": 0.23214091360569, "lr": 3.922383876848771e-06, "epoch": 1.4458173730384283, "percentage": 72.28, "elapsed_time": "10:36:01", "remaining_time": "4:03:53"} +{"current_steps": 2927, "total_steps": 4048, "loss": 0.23830139636993408, "lr": 3.915897479522995e-06, "epoch": 1.4463116273322625, "percentage": 72.31, "elapsed_time": "10:36:14", "remaining_time": "4:03:40"} +{"current_steps": 2928, "total_steps": 4048, "loss": 0.2519805431365967, "lr": 3.909415143857132e-06, "epoch": 1.4468058816260967, "percentage": 72.33, "elapsed_time": "10:36:27", "remaining_time": "4:03:27"} +{"current_steps": 2929, "total_steps": 4048, "loss": 0.2191445231437683, "lr": 3.9029368741786935e-06, "epoch": 1.4473001359199307, "percentage": 72.36, "elapsed_time": "10:36:41", "remaining_time": "4:03:14"} +{"current_steps": 2930, "total_steps": 4048, "loss": 0.2267228364944458, "lr": 3.896462674812482e-06, "epoch": 1.4477943902137649, "percentage": 72.38, "elapsed_time": "10:36:54", "remaining_time": "4:03:01"} +{"current_steps": 2931, "total_steps": 4048, "loss": 0.26456522941589355, "lr": 3.88999255008058e-06, "epoch": 1.448288644507599, "percentage": 72.41, "elapsed_time": "10:37:07", "remaining_time": "4:02:48"} +{"current_steps": 2932, "total_steps": 4048, "loss": 0.25602713227272034, "lr": 3.883526504302353e-06, "epoch": 1.4487828988014333, "percentage": 72.43, "elapsed_time": "10:37:20", "remaining_time": "4:02:35"} +{"current_steps": 2933, "total_steps": 4048, "loss": 0.2545332610607147, "lr": 3.877064541794435e-06, "epoch": 1.4492771530952675, "percentage": 72.46, "elapsed_time": "10:37:33", "remaining_time": "4:02:22"} +{"current_steps": 2934, "total_steps": 4048, "loss": 0.2846388816833496, "lr": 3.87060666687074e-06, "epoch": 1.4497714073891017, "percentage": 72.48, "elapsed_time": "10:37:46", "remaining_time": "4:02:09"} +{"current_steps": 2935, "total_steps": 4048, "loss": 0.2686496376991272, "lr": 3.864152883842461e-06, "epoch": 1.450265661682936, "percentage": 72.5, "elapsed_time": "10:38:00", "remaining_time": "4:01:56"} +{"current_steps": 2936, "total_steps": 4048, "loss": 0.2712322473526001, "lr": 3.857703197018044e-06, "epoch": 1.4507599159767701, "percentage": 72.53, "elapsed_time": "10:38:13", "remaining_time": "4:01:43"} +{"current_steps": 2937, "total_steps": 4048, "loss": 0.23492589592933655, "lr": 3.851257610703209e-06, "epoch": 1.4512541702706043, "percentage": 72.55, "elapsed_time": "10:38:26", "remaining_time": "4:01:30"} +{"current_steps": 2938, "total_steps": 4048, "loss": 0.274332731962204, "lr": 3.84481612920094e-06, "epoch": 1.4517484245644385, "percentage": 72.58, "elapsed_time": "10:38:39", "remaining_time": "4:01:17"} +{"current_steps": 2939, "total_steps": 4048, "loss": 0.250995010137558, "lr": 3.838378756811475e-06, "epoch": 1.4522426788582725, "percentage": 72.6, "elapsed_time": "10:38:53", "remaining_time": "4:01:04"} +{"current_steps": 2940, "total_steps": 4048, "loss": 0.25221261382102966, "lr": 3.831945497832313e-06, "epoch": 1.4527369331521067, "percentage": 72.63, "elapsed_time": "10:39:06", "remaining_time": "4:00:51"} +{"current_steps": 2941, "total_steps": 4048, "loss": 0.2549906075000763, "lr": 3.825516356558211e-06, "epoch": 1.453231187445941, "percentage": 72.65, "elapsed_time": "10:39:19", "remaining_time": "4:00:38"} +{"current_steps": 2942, "total_steps": 4048, "loss": 0.2369248867034912, "lr": 3.819091337281158e-06, "epoch": 1.4537254417397751, "percentage": 72.68, "elapsed_time": "10:39:33", "remaining_time": "4:00:25"} +{"current_steps": 2943, "total_steps": 4048, "loss": 0.23681433498859406, "lr": 3.8126704442904182e-06, "epoch": 1.4542196960336093, "percentage": 72.7, "elapsed_time": "10:39:46", "remaining_time": "4:00:12"} +{"current_steps": 2944, "total_steps": 4048, "loss": 0.24966523051261902, "lr": 3.806253681872486e-06, "epoch": 1.4547139503274436, "percentage": 72.73, "elapsed_time": "10:39:59", "remaining_time": "3:59:59"} +{"current_steps": 2945, "total_steps": 4048, "loss": 0.28130626678466797, "lr": 3.7998410543110954e-06, "epoch": 1.4552082046212775, "percentage": 72.75, "elapsed_time": "10:40:13", "remaining_time": "3:59:46"} +{"current_steps": 2946, "total_steps": 4048, "loss": 0.2725732922554016, "lr": 3.7934325658872275e-06, "epoch": 1.4557024589151117, "percentage": 72.78, "elapsed_time": "10:40:26", "remaining_time": "3:59:34"} +{"current_steps": 2947, "total_steps": 4048, "loss": 0.23695361614227295, "lr": 3.7870282208790976e-06, "epoch": 1.456196713208946, "percentage": 72.8, "elapsed_time": "10:40:39", "remaining_time": "3:59:21"} +{"current_steps": 2948, "total_steps": 4048, "loss": 0.2556610405445099, "lr": 3.780628023562154e-06, "epoch": 1.4566909675027802, "percentage": 72.83, "elapsed_time": "10:40:52", "remaining_time": "3:59:08"} +{"current_steps": 2949, "total_steps": 4048, "loss": 0.26012274622917175, "lr": 3.7742319782090786e-06, "epoch": 1.4571852217966144, "percentage": 72.85, "elapsed_time": "10:41:05", "remaining_time": "3:58:54"} +{"current_steps": 2950, "total_steps": 4048, "loss": 0.23788896203041077, "lr": 3.7678400890897827e-06, "epoch": 1.4576794760904486, "percentage": 72.88, "elapsed_time": "10:41:18", "remaining_time": "3:58:41"} +{"current_steps": 2951, "total_steps": 4048, "loss": 0.2927572727203369, "lr": 3.7614523604713894e-06, "epoch": 1.4581737303842828, "percentage": 72.9, "elapsed_time": "10:41:31", "remaining_time": "3:58:28"} +{"current_steps": 2952, "total_steps": 4048, "loss": 0.2254970222711563, "lr": 3.75506879661827e-06, "epoch": 1.458667984678117, "percentage": 72.92, "elapsed_time": "10:41:45", "remaining_time": "3:58:16"} +{"current_steps": 2953, "total_steps": 4048, "loss": 0.216854065656662, "lr": 3.7486894017919883e-06, "epoch": 1.4591622389719512, "percentage": 72.95, "elapsed_time": "10:41:58", "remaining_time": "3:58:02"} +{"current_steps": 2954, "total_steps": 4048, "loss": 0.2505137026309967, "lr": 3.7423141802513417e-06, "epoch": 1.4596564932657852, "percentage": 72.97, "elapsed_time": "10:42:12", "remaining_time": "3:57:50"} +{"current_steps": 2955, "total_steps": 4048, "loss": 0.19780108332633972, "lr": 3.735943136252337e-06, "epoch": 1.4601507475596194, "percentage": 73.0, "elapsed_time": "10:42:25", "remaining_time": "3:57:37"} +{"current_steps": 2956, "total_steps": 4048, "loss": 0.26869216561317444, "lr": 3.7295762740481923e-06, "epoch": 1.4606450018534536, "percentage": 73.02, "elapsed_time": "10:42:38", "remaining_time": "3:57:24"} +{"current_steps": 2957, "total_steps": 4048, "loss": 0.28265517950057983, "lr": 3.7232135978893336e-06, "epoch": 1.4611392561472878, "percentage": 73.05, "elapsed_time": "10:42:51", "remaining_time": "3:57:11"} +{"current_steps": 2958, "total_steps": 4048, "loss": 0.2381918877363205, "lr": 3.7168551120233965e-06, "epoch": 1.461633510441122, "percentage": 73.07, "elapsed_time": "10:43:04", "remaining_time": "3:56:58"} +{"current_steps": 2959, "total_steps": 4048, "loss": 0.27194735407829285, "lr": 3.710500820695203e-06, "epoch": 1.4621277647349562, "percentage": 73.1, "elapsed_time": "10:43:18", "remaining_time": "3:56:45"} +{"current_steps": 2960, "total_steps": 4048, "loss": 0.2611599266529083, "lr": 3.7041507281468e-06, "epoch": 1.4626220190287902, "percentage": 73.12, "elapsed_time": "10:43:31", "remaining_time": "3:56:32"} +{"current_steps": 2961, "total_steps": 4048, "loss": 0.2970972955226898, "lr": 3.697804838617418e-06, "epoch": 1.4631162733226244, "percentage": 73.15, "elapsed_time": "10:43:44", "remaining_time": "3:56:19"} +{"current_steps": 2962, "total_steps": 4048, "loss": 0.24313557147979736, "lr": 3.6914631563434743e-06, "epoch": 1.4636105276164586, "percentage": 73.17, "elapsed_time": "10:43:57", "remaining_time": "3:56:06"} +{"current_steps": 2963, "total_steps": 4048, "loss": 0.23243792355060577, "lr": 3.685125685558587e-06, "epoch": 1.4641047819102928, "percentage": 73.2, "elapsed_time": "10:44:11", "remaining_time": "3:55:53"} +{"current_steps": 2964, "total_steps": 4048, "loss": 0.2850711941719055, "lr": 3.6787924304935696e-06, "epoch": 1.464599036204127, "percentage": 73.22, "elapsed_time": "10:44:24", "remaining_time": "3:55:40"} +{"current_steps": 2965, "total_steps": 4048, "loss": 0.26217392086982727, "lr": 3.6724633953764023e-06, "epoch": 1.4650932904979612, "percentage": 73.25, "elapsed_time": "10:44:37", "remaining_time": "3:55:27"} +{"current_steps": 2966, "total_steps": 4048, "loss": 0.24623268842697144, "lr": 3.666138584432264e-06, "epoch": 1.4655875447917954, "percentage": 73.27, "elapsed_time": "10:44:50", "remaining_time": "3:55:14"} +{"current_steps": 2967, "total_steps": 4048, "loss": 0.25010040402412415, "lr": 3.6598180018835063e-06, "epoch": 1.4660817990856296, "percentage": 73.3, "elapsed_time": "10:45:03", "remaining_time": "3:55:01"} +{"current_steps": 2968, "total_steps": 4048, "loss": 0.24471378326416016, "lr": 3.6535016519496603e-06, "epoch": 1.4665760533794638, "percentage": 73.32, "elapsed_time": "10:45:16", "remaining_time": "3:54:48"} +{"current_steps": 2969, "total_steps": 4048, "loss": 0.2845621109008789, "lr": 3.6471895388474323e-06, "epoch": 1.467070307673298, "percentage": 73.34, "elapsed_time": "10:45:30", "remaining_time": "3:54:35"} +{"current_steps": 2970, "total_steps": 4048, "loss": 0.26768919825553894, "lr": 3.640881666790699e-06, "epoch": 1.467564561967132, "percentage": 73.37, "elapsed_time": "10:45:43", "remaining_time": "3:54:22"} +{"current_steps": 2971, "total_steps": 4048, "loss": 0.27386170625686646, "lr": 3.6345780399904983e-06, "epoch": 1.4680588162609662, "percentage": 73.39, "elapsed_time": "10:45:56", "remaining_time": "3:54:09"} +{"current_steps": 2972, "total_steps": 4048, "loss": 0.259655237197876, "lr": 3.628278662655055e-06, "epoch": 1.4685530705548004, "percentage": 73.42, "elapsed_time": "10:46:09", "remaining_time": "3:53:56"} +{"current_steps": 2973, "total_steps": 4048, "loss": 0.2234620749950409, "lr": 3.6219835389897305e-06, "epoch": 1.4690473248486347, "percentage": 73.44, "elapsed_time": "10:46:23", "remaining_time": "3:53:43"} +{"current_steps": 2974, "total_steps": 4048, "loss": 0.25133174657821655, "lr": 3.6156926731970664e-06, "epoch": 1.4695415791424689, "percentage": 73.47, "elapsed_time": "10:46:36", "remaining_time": "3:53:30"} +{"current_steps": 2975, "total_steps": 4048, "loss": 0.2856005132198334, "lr": 3.609406069476752e-06, "epoch": 1.4700358334363028, "percentage": 73.49, "elapsed_time": "10:46:49", "remaining_time": "3:53:17"} +{"current_steps": 2976, "total_steps": 4048, "loss": 0.23760217428207397, "lr": 3.603123732025635e-06, "epoch": 1.470530087730137, "percentage": 73.52, "elapsed_time": "10:47:02", "remaining_time": "3:53:04"} +{"current_steps": 2977, "total_steps": 4048, "loss": 0.2344968169927597, "lr": 3.596845665037715e-06, "epoch": 1.4710243420239713, "percentage": 73.54, "elapsed_time": "10:47:15", "remaining_time": "3:52:51"} +{"current_steps": 2978, "total_steps": 4048, "loss": 0.23936885595321655, "lr": 3.5905718727041415e-06, "epoch": 1.4715185963178055, "percentage": 73.57, "elapsed_time": "10:47:28", "remaining_time": "3:52:38"} +{"current_steps": 2979, "total_steps": 4048, "loss": 0.24542436003684998, "lr": 3.584302359213204e-06, "epoch": 1.4720128506116397, "percentage": 73.59, "elapsed_time": "10:47:41", "remaining_time": "3:52:25"} +{"current_steps": 2980, "total_steps": 4048, "loss": 0.24754226207733154, "lr": 3.578037128750338e-06, "epoch": 1.4725071049054739, "percentage": 73.62, "elapsed_time": "10:47:54", "remaining_time": "3:52:12"} +{"current_steps": 2981, "total_steps": 4048, "loss": 0.25167495012283325, "lr": 3.5717761854981335e-06, "epoch": 1.473001359199308, "percentage": 73.64, "elapsed_time": "10:48:07", "remaining_time": "3:51:59"} +{"current_steps": 2982, "total_steps": 4048, "loss": 0.21352116763591766, "lr": 3.565519533636296e-06, "epoch": 1.4734956134931423, "percentage": 73.67, "elapsed_time": "10:48:21", "remaining_time": "3:51:46"} +{"current_steps": 2983, "total_steps": 4048, "loss": 0.24721838533878326, "lr": 3.5592671773416798e-06, "epoch": 1.4739898677869765, "percentage": 73.69, "elapsed_time": "10:48:34", "remaining_time": "3:51:33"} +{"current_steps": 2984, "total_steps": 4048, "loss": 0.2098400741815567, "lr": 3.5530191207882705e-06, "epoch": 1.4744841220808107, "percentage": 73.72, "elapsed_time": "10:48:47", "remaining_time": "3:51:20"} +{"current_steps": 2985, "total_steps": 4048, "loss": 0.27138370275497437, "lr": 3.5467753681471784e-06, "epoch": 1.4749783763746447, "percentage": 73.74, "elapsed_time": "10:49:00", "remaining_time": "3:51:07"} +{"current_steps": 2986, "total_steps": 4048, "loss": 0.2675255537033081, "lr": 3.5405359235866468e-06, "epoch": 1.475472630668479, "percentage": 73.76, "elapsed_time": "10:49:13", "remaining_time": "3:50:54"} +{"current_steps": 2987, "total_steps": 4048, "loss": 0.2927984893321991, "lr": 3.5343007912720397e-06, "epoch": 1.475966884962313, "percentage": 73.79, "elapsed_time": "10:49:26", "remaining_time": "3:50:41"} +{"current_steps": 2988, "total_steps": 4048, "loss": 0.2897256910800934, "lr": 3.5280699753658354e-06, "epoch": 1.4764611392561473, "percentage": 73.81, "elapsed_time": "10:49:39", "remaining_time": "3:50:28"} +{"current_steps": 2989, "total_steps": 4048, "loss": 0.25903570652008057, "lr": 3.521843480027646e-06, "epoch": 1.4769553935499815, "percentage": 73.84, "elapsed_time": "10:49:53", "remaining_time": "3:50:15"} +{"current_steps": 2990, "total_steps": 4048, "loss": 0.2097684144973755, "lr": 3.515621309414191e-06, "epoch": 1.4774496478438157, "percentage": 73.86, "elapsed_time": "10:50:06", "remaining_time": "3:50:02"} +{"current_steps": 2991, "total_steps": 4048, "loss": 0.25807827711105347, "lr": 3.5094034676792952e-06, "epoch": 1.4779439021376497, "percentage": 73.89, "elapsed_time": "10:50:20", "remaining_time": "3:49:49"} +{"current_steps": 2992, "total_steps": 4048, "loss": 0.24161803722381592, "lr": 3.503189958973906e-06, "epoch": 1.478438156431484, "percentage": 73.91, "elapsed_time": "10:50:33", "remaining_time": "3:49:36"} +{"current_steps": 2993, "total_steps": 4048, "loss": 0.2612338364124298, "lr": 3.4969807874460717e-06, "epoch": 1.4789324107253181, "percentage": 73.94, "elapsed_time": "10:50:46", "remaining_time": "3:49:23"} +{"current_steps": 2994, "total_steps": 4048, "loss": 0.2529192566871643, "lr": 3.490775957240947e-06, "epoch": 1.4794266650191523, "percentage": 73.96, "elapsed_time": "10:50:59", "remaining_time": "3:49:10"} +{"current_steps": 2995, "total_steps": 4048, "loss": 0.2616920471191406, "lr": 3.4845754725007883e-06, "epoch": 1.4799209193129865, "percentage": 73.99, "elapsed_time": "10:51:13", "remaining_time": "3:48:57"} +{"current_steps": 2996, "total_steps": 4048, "loss": 0.2372770607471466, "lr": 3.4783793373649534e-06, "epoch": 1.4804151736068207, "percentage": 74.01, "elapsed_time": "10:51:26", "remaining_time": "3:48:44"} +{"current_steps": 2997, "total_steps": 4048, "loss": 0.2993369996547699, "lr": 3.4721875559698826e-06, "epoch": 1.480909427900655, "percentage": 74.04, "elapsed_time": "10:51:39", "remaining_time": "3:48:31"} +{"current_steps": 2998, "total_steps": 4048, "loss": 0.2703147530555725, "lr": 3.4660001324491354e-06, "epoch": 1.4814036821944891, "percentage": 74.06, "elapsed_time": "10:51:52", "remaining_time": "3:48:18"} +{"current_steps": 2999, "total_steps": 4048, "loss": 0.2909662425518036, "lr": 3.459817070933337e-06, "epoch": 1.4818979364883234, "percentage": 74.09, "elapsed_time": "10:52:06", "remaining_time": "3:48:05"} +{"current_steps": 3000, "total_steps": 4048, "loss": 0.2620519697666168, "lr": 3.4536383755502146e-06, "epoch": 1.4823921907821573, "percentage": 74.11, "elapsed_time": "10:52:19", "remaining_time": "3:47:52"} +{"current_steps": 3001, "total_steps": 4048, "loss": 0.2740327715873718, "lr": 3.447464050424576e-06, "epoch": 1.4828864450759915, "percentage": 74.14, "elapsed_time": "10:52:38", "remaining_time": "3:47:41"} +{"current_steps": 3002, "total_steps": 4048, "loss": 0.2597920000553131, "lr": 3.441294099678314e-06, "epoch": 1.4833806993698258, "percentage": 74.16, "elapsed_time": "10:52:52", "remaining_time": "3:47:29"} +{"current_steps": 3003, "total_steps": 4048, "loss": 0.23138844966888428, "lr": 3.435128527430397e-06, "epoch": 1.48387495366366, "percentage": 74.18, "elapsed_time": "10:53:05", "remaining_time": "3:47:15"} +{"current_steps": 3004, "total_steps": 4048, "loss": 0.26457998156547546, "lr": 3.428967337796879e-06, "epoch": 1.4843692079574942, "percentage": 74.21, "elapsed_time": "10:53:18", "remaining_time": "3:47:03"} +{"current_steps": 3005, "total_steps": 4048, "loss": 0.22283414006233215, "lr": 3.4228105348908703e-06, "epoch": 1.4848634622513284, "percentage": 74.23, "elapsed_time": "10:53:31", "remaining_time": "3:46:49"} +{"current_steps": 3006, "total_steps": 4048, "loss": 0.26169392466545105, "lr": 3.416658122822576e-06, "epoch": 1.4853577165451624, "percentage": 74.26, "elapsed_time": "10:53:44", "remaining_time": "3:46:36"} +{"current_steps": 3007, "total_steps": 4048, "loss": 0.22738765180110931, "lr": 3.4105101056992574e-06, "epoch": 1.4858519708389966, "percentage": 74.28, "elapsed_time": "10:53:57", "remaining_time": "3:46:23"} +{"current_steps": 3008, "total_steps": 4048, "loss": 0.24252702295780182, "lr": 3.404366487625237e-06, "epoch": 1.4863462251328308, "percentage": 74.31, "elapsed_time": "10:54:11", "remaining_time": "3:46:10"} +{"current_steps": 3009, "total_steps": 4048, "loss": 0.2192659229040146, "lr": 3.398227272701905e-06, "epoch": 1.486840479426665, "percentage": 74.33, "elapsed_time": "10:54:24", "remaining_time": "3:45:57"} +{"current_steps": 3010, "total_steps": 4048, "loss": 0.23824100196361542, "lr": 3.3920924650277253e-06, "epoch": 1.4873347337204992, "percentage": 74.36, "elapsed_time": "10:54:37", "remaining_time": "3:45:44"} +{"current_steps": 3011, "total_steps": 4048, "loss": 0.25558948516845703, "lr": 3.3859620686981977e-06, "epoch": 1.4878289880143334, "percentage": 74.38, "elapsed_time": "10:54:51", "remaining_time": "3:45:32"} +{"current_steps": 3012, "total_steps": 4048, "loss": 0.23521414399147034, "lr": 3.3798360878058887e-06, "epoch": 1.4883232423081676, "percentage": 74.41, "elapsed_time": "10:55:04", "remaining_time": "3:45:19"} +{"current_steps": 3013, "total_steps": 4048, "loss": 0.26024043560028076, "lr": 3.373714526440417e-06, "epoch": 1.4888174966020018, "percentage": 74.43, "elapsed_time": "10:55:17", "remaining_time": "3:45:06"} +{"current_steps": 3014, "total_steps": 4048, "loss": 0.2676945626735687, "lr": 3.3675973886884506e-06, "epoch": 1.489311750895836, "percentage": 74.46, "elapsed_time": "10:55:30", "remaining_time": "3:44:53"} +{"current_steps": 3015, "total_steps": 4048, "loss": 0.29499778151512146, "lr": 3.361484678633701e-06, "epoch": 1.4898060051896702, "percentage": 74.48, "elapsed_time": "10:55:44", "remaining_time": "3:44:40"} +{"current_steps": 3016, "total_steps": 4048, "loss": 0.21667227149009705, "lr": 3.35537640035693e-06, "epoch": 1.4903002594835042, "percentage": 74.51, "elapsed_time": "10:55:57", "remaining_time": "3:44:27"} +{"current_steps": 3017, "total_steps": 4048, "loss": 0.2852727770805359, "lr": 3.3492725579359288e-06, "epoch": 1.4907945137773384, "percentage": 74.53, "elapsed_time": "10:56:10", "remaining_time": "3:44:14"} +{"current_steps": 3018, "total_steps": 4048, "loss": 0.22535362839698792, "lr": 3.343173155445546e-06, "epoch": 1.4912887680711726, "percentage": 74.56, "elapsed_time": "10:56:23", "remaining_time": "3:44:01"} +{"current_steps": 3019, "total_steps": 4048, "loss": 0.23513402044773102, "lr": 3.3370781969576473e-06, "epoch": 1.4917830223650068, "percentage": 74.58, "elapsed_time": "10:56:37", "remaining_time": "3:43:48"} +{"current_steps": 3020, "total_steps": 4048, "loss": 0.2343328893184662, "lr": 3.3309876865411426e-06, "epoch": 1.492277276658841, "percentage": 74.6, "elapsed_time": "10:56:50", "remaining_time": "3:43:35"} +{"current_steps": 3021, "total_steps": 4048, "loss": 0.309964656829834, "lr": 3.3249016282619696e-06, "epoch": 1.4927715309526752, "percentage": 74.63, "elapsed_time": "10:57:03", "remaining_time": "3:43:22"} +{"current_steps": 3022, "total_steps": 4048, "loss": 0.2678214907646179, "lr": 3.318820026183095e-06, "epoch": 1.4932657852465092, "percentage": 74.65, "elapsed_time": "10:57:16", "remaining_time": "3:43:09"} +{"current_steps": 3023, "total_steps": 4048, "loss": 0.24117907881736755, "lr": 3.312742884364508e-06, "epoch": 1.4937600395403434, "percentage": 74.68, "elapsed_time": "10:57:29", "remaining_time": "3:42:56"} +{"current_steps": 3024, "total_steps": 4048, "loss": 0.23572009801864624, "lr": 3.306670206863225e-06, "epoch": 1.4942542938341776, "percentage": 74.7, "elapsed_time": "10:57:43", "remaining_time": "3:42:43"} +{"current_steps": 3025, "total_steps": 4048, "loss": 0.20058652758598328, "lr": 3.3006019977332728e-06, "epoch": 1.4947485481280118, "percentage": 74.73, "elapsed_time": "10:57:56", "remaining_time": "3:42:30"} +{"current_steps": 3026, "total_steps": 4048, "loss": 0.2433123141527176, "lr": 3.2945382610257017e-06, "epoch": 1.495242802421846, "percentage": 74.75, "elapsed_time": "10:58:10", "remaining_time": "3:42:17"} +{"current_steps": 3027, "total_steps": 4048, "loss": 0.2648032009601593, "lr": 3.2884790007885834e-06, "epoch": 1.4957370567156802, "percentage": 74.78, "elapsed_time": "10:58:23", "remaining_time": "3:42:04"} +{"current_steps": 3028, "total_steps": 4048, "loss": 0.23508986830711365, "lr": 3.2824242210669853e-06, "epoch": 1.4962313110095145, "percentage": 74.8, "elapsed_time": "10:58:37", "remaining_time": "3:41:51"} +{"current_steps": 3029, "total_steps": 4048, "loss": 0.2340327799320221, "lr": 3.2763739259029946e-06, "epoch": 1.4967255653033487, "percentage": 74.83, "elapsed_time": "10:58:50", "remaining_time": "3:41:38"} +{"current_steps": 3030, "total_steps": 4048, "loss": 0.24071671068668365, "lr": 3.2703281193357028e-06, "epoch": 1.4972198195971829, "percentage": 74.85, "elapsed_time": "10:59:03", "remaining_time": "3:41:25"} +{"current_steps": 3031, "total_steps": 4048, "loss": 0.26332271099090576, "lr": 3.264286805401203e-06, "epoch": 1.4977140738910169, "percentage": 74.88, "elapsed_time": "10:59:17", "remaining_time": "3:41:12"} +{"current_steps": 3032, "total_steps": 4048, "loss": 0.21818014979362488, "lr": 3.2582499881325904e-06, "epoch": 1.498208328184851, "percentage": 74.9, "elapsed_time": "10:59:30", "remaining_time": "3:40:59"} +{"current_steps": 3033, "total_steps": 4048, "loss": 0.26997917890548706, "lr": 3.2522176715599606e-06, "epoch": 1.4987025824786853, "percentage": 74.93, "elapsed_time": "10:59:43", "remaining_time": "3:40:46"} +{"current_steps": 3034, "total_steps": 4048, "loss": 0.21703608334064484, "lr": 3.2461898597103935e-06, "epoch": 1.4991968367725195, "percentage": 74.95, "elapsed_time": "10:59:56", "remaining_time": "3:40:33"} +{"current_steps": 3035, "total_steps": 4048, "loss": 0.24345526099205017, "lr": 3.240166556607979e-06, "epoch": 1.4996910910663537, "percentage": 74.98, "elapsed_time": "11:00:10", "remaining_time": "3:40:20"} +{"current_steps": 3036, "total_steps": 4048, "loss": 0.2428402602672577, "lr": 3.2341477662737877e-06, "epoch": 1.5001853453601877, "percentage": 75.0, "elapsed_time": "11:00:23", "remaining_time": "3:40:07"} +{"current_steps": 3037, "total_steps": 4048, "loss": 0.234619602560997, "lr": 3.228133492725872e-06, "epoch": 1.5006795996540219, "percentage": 75.02, "elapsed_time": "11:00:36", "remaining_time": "3:39:54"} +{"current_steps": 3038, "total_steps": 4048, "loss": 0.27995944023132324, "lr": 3.2221237399792784e-06, "epoch": 1.501173853947856, "percentage": 75.05, "elapsed_time": "11:00:49", "remaining_time": "3:39:41"} +{"current_steps": 3039, "total_steps": 4048, "loss": 0.23708665370941162, "lr": 3.2161185120460327e-06, "epoch": 1.5016681082416903, "percentage": 75.07, "elapsed_time": "11:01:03", "remaining_time": "3:39:28"} +{"current_steps": 3040, "total_steps": 4048, "loss": 0.2541486620903015, "lr": 3.2101178129351373e-06, "epoch": 1.5021623625355245, "percentage": 75.1, "elapsed_time": "11:01:16", "remaining_time": "3:39:15"} +{"current_steps": 3041, "total_steps": 4048, "loss": 0.2281494140625, "lr": 3.204121646652576e-06, "epoch": 1.5026566168293587, "percentage": 75.12, "elapsed_time": "11:01:29", "remaining_time": "3:39:02"} +{"current_steps": 3042, "total_steps": 4048, "loss": 0.24793995916843414, "lr": 3.1981300172013006e-06, "epoch": 1.503150871123193, "percentage": 75.15, "elapsed_time": "11:01:42", "remaining_time": "3:38:49"} +{"current_steps": 3043, "total_steps": 4048, "loss": 0.25877612829208374, "lr": 3.19214292858124e-06, "epoch": 1.503645125417027, "percentage": 75.17, "elapsed_time": "11:01:56", "remaining_time": "3:38:36"} +{"current_steps": 3044, "total_steps": 4048, "loss": 0.23822908103466034, "lr": 3.1861603847892907e-06, "epoch": 1.5041393797108613, "percentage": 75.2, "elapsed_time": "11:02:09", "remaining_time": "3:38:23"} +{"current_steps": 3045, "total_steps": 4048, "loss": 0.2450297623872757, "lr": 3.1801823898193075e-06, "epoch": 1.5046336340046955, "percentage": 75.22, "elapsed_time": "11:02:22", "remaining_time": "3:38:10"} +{"current_steps": 3046, "total_steps": 4048, "loss": 0.23657044768333435, "lr": 3.1742089476621176e-06, "epoch": 1.5051278882985297, "percentage": 75.25, "elapsed_time": "11:02:35", "remaining_time": "3:37:57"} +{"current_steps": 3047, "total_steps": 4048, "loss": 0.22040539979934692, "lr": 3.1682400623055043e-06, "epoch": 1.505622142592364, "percentage": 75.27, "elapsed_time": "11:02:49", "remaining_time": "3:37:44"} +{"current_steps": 3048, "total_steps": 4048, "loss": 0.24671347439289093, "lr": 3.162275737734213e-06, "epoch": 1.506116396886198, "percentage": 75.3, "elapsed_time": "11:03:01", "remaining_time": "3:37:31"} +{"current_steps": 3049, "total_steps": 4048, "loss": 0.2590971291065216, "lr": 3.156315977929939e-06, "epoch": 1.5066106511800321, "percentage": 75.32, "elapsed_time": "11:03:15", "remaining_time": "3:37:18"} +{"current_steps": 3050, "total_steps": 4048, "loss": 0.2650923430919647, "lr": 3.1503607868713383e-06, "epoch": 1.5071049054738663, "percentage": 75.35, "elapsed_time": "11:03:28", "remaining_time": "3:37:05"} +{"current_steps": 3051, "total_steps": 4048, "loss": 0.22146420180797577, "lr": 3.1444101685339987e-06, "epoch": 1.5075991597677005, "percentage": 75.37, "elapsed_time": "11:03:41", "remaining_time": "3:36:52"} +{"current_steps": 3052, "total_steps": 4048, "loss": 0.26743125915527344, "lr": 3.1384641268904804e-06, "epoch": 1.5080934140615345, "percentage": 75.4, "elapsed_time": "11:03:55", "remaining_time": "3:36:39"} +{"current_steps": 3053, "total_steps": 4048, "loss": 0.24730908870697021, "lr": 3.1325226659102746e-06, "epoch": 1.5085876683553687, "percentage": 75.42, "elapsed_time": "11:04:07", "remaining_time": "3:36:26"} +{"current_steps": 3054, "total_steps": 4048, "loss": 0.26301079988479614, "lr": 3.1265857895598094e-06, "epoch": 1.509081922649203, "percentage": 75.44, "elapsed_time": "11:04:21", "remaining_time": "3:36:13"} +{"current_steps": 3055, "total_steps": 4048, "loss": 0.22815877199172974, "lr": 3.1206535018024598e-06, "epoch": 1.5095761769430371, "percentage": 75.47, "elapsed_time": "11:04:34", "remaining_time": "3:36:00"} +{"current_steps": 3056, "total_steps": 4048, "loss": 0.25178754329681396, "lr": 3.114725806598544e-06, "epoch": 1.5100704312368713, "percentage": 75.49, "elapsed_time": "11:04:47", "remaining_time": "3:35:47"} +{"current_steps": 3057, "total_steps": 4048, "loss": 0.20269548892974854, "lr": 3.1088027079052973e-06, "epoch": 1.5105646855307056, "percentage": 75.52, "elapsed_time": "11:05:00", "remaining_time": "3:35:34"} +{"current_steps": 3058, "total_steps": 4048, "loss": 0.25972461700439453, "lr": 3.1028842096769006e-06, "epoch": 1.5110589398245398, "percentage": 75.54, "elapsed_time": "11:05:14", "remaining_time": "3:35:21"} +{"current_steps": 3059, "total_steps": 4048, "loss": 0.23313641548156738, "lr": 3.0969703158644583e-06, "epoch": 1.511553194118374, "percentage": 75.57, "elapsed_time": "11:05:27", "remaining_time": "3:35:08"} +{"current_steps": 3060, "total_steps": 4048, "loss": 0.2359476238489151, "lr": 3.0910610304159993e-06, "epoch": 1.5120474484122082, "percentage": 75.59, "elapsed_time": "11:05:41", "remaining_time": "3:34:56"} +{"current_steps": 3061, "total_steps": 4048, "loss": 0.263039767742157, "lr": 3.085156357276481e-06, "epoch": 1.5125417027060424, "percentage": 75.62, "elapsed_time": "11:05:54", "remaining_time": "3:34:42"} +{"current_steps": 3062, "total_steps": 4048, "loss": 0.2222701609134674, "lr": 3.0792563003877795e-06, "epoch": 1.5130359569998766, "percentage": 75.64, "elapsed_time": "11:06:07", "remaining_time": "3:34:30"} +{"current_steps": 3063, "total_steps": 4048, "loss": 0.2511240839958191, "lr": 3.0733608636886815e-06, "epoch": 1.5135302112937106, "percentage": 75.67, "elapsed_time": "11:06:20", "remaining_time": "3:34:17"} +{"current_steps": 3064, "total_steps": 4048, "loss": 0.26376873254776, "lr": 3.0674700511149057e-06, "epoch": 1.5140244655875448, "percentage": 75.69, "elapsed_time": "11:06:34", "remaining_time": "3:34:04"} +{"current_steps": 3065, "total_steps": 4048, "loss": 0.2883176803588867, "lr": 3.0615838665990685e-06, "epoch": 1.514518719881379, "percentage": 75.72, "elapsed_time": "11:06:46", "remaining_time": "3:33:50"} +{"current_steps": 3066, "total_steps": 4048, "loss": 0.2641439437866211, "lr": 3.055702314070703e-06, "epoch": 1.5150129741752132, "percentage": 75.74, "elapsed_time": "11:06:59", "remaining_time": "3:33:37"} +{"current_steps": 3067, "total_steps": 4048, "loss": 0.22250229120254517, "lr": 3.049825397456252e-06, "epoch": 1.5155072284690472, "percentage": 75.77, "elapsed_time": "11:07:13", "remaining_time": "3:33:24"} +{"current_steps": 3068, "total_steps": 4048, "loss": 0.291684091091156, "lr": 3.0439531206790585e-06, "epoch": 1.5160014827628814, "percentage": 75.79, "elapsed_time": "11:07:26", "remaining_time": "3:33:11"} +{"current_steps": 3069, "total_steps": 4048, "loss": 0.22581104934215546, "lr": 3.0380854876593725e-06, "epoch": 1.5164957370567156, "percentage": 75.82, "elapsed_time": "11:07:39", "remaining_time": "3:32:58"} +{"current_steps": 3070, "total_steps": 4048, "loss": 0.22701920568943024, "lr": 3.032222502314345e-06, "epoch": 1.5169899913505498, "percentage": 75.84, "elapsed_time": "11:07:52", "remaining_time": "3:32:45"} +{"current_steps": 3071, "total_steps": 4048, "loss": 0.27151840925216675, "lr": 3.0263641685580134e-06, "epoch": 1.517484245644384, "percentage": 75.86, "elapsed_time": "11:08:06", "remaining_time": "3:32:32"} +{"current_steps": 3072, "total_steps": 4048, "loss": 0.25780510902404785, "lr": 3.0205104903013183e-06, "epoch": 1.5179784999382182, "percentage": 75.89, "elapsed_time": "11:08:19", "remaining_time": "3:32:19"} +{"current_steps": 3073, "total_steps": 4048, "loss": 0.23905009031295776, "lr": 3.014661471452103e-06, "epoch": 1.5184727542320524, "percentage": 75.91, "elapsed_time": "11:08:32", "remaining_time": "3:32:06"} +{"current_steps": 3074, "total_steps": 4048, "loss": 0.25984710454940796, "lr": 3.0088171159150758e-06, "epoch": 1.5189670085258866, "percentage": 75.94, "elapsed_time": "11:08:45", "remaining_time": "3:31:53"} +{"current_steps": 3075, "total_steps": 4048, "loss": 0.24934321641921997, "lr": 3.0029774275918523e-06, "epoch": 1.5194612628197208, "percentage": 75.96, "elapsed_time": "11:08:59", "remaining_time": "3:31:40"} +{"current_steps": 3076, "total_steps": 4048, "loss": 0.24181538820266724, "lr": 2.997142410380921e-06, "epoch": 1.519955517113555, "percentage": 75.99, "elapsed_time": "11:09:12", "remaining_time": "3:31:27"} +{"current_steps": 3077, "total_steps": 4048, "loss": 0.28867265582084656, "lr": 2.9913120681776586e-06, "epoch": 1.5204497714073892, "percentage": 76.01, "elapsed_time": "11:09:25", "remaining_time": "3:31:14"} +{"current_steps": 3078, "total_steps": 4048, "loss": 0.25082239508628845, "lr": 2.9854864048743183e-06, "epoch": 1.5209440257012234, "percentage": 76.04, "elapsed_time": "11:09:38", "remaining_time": "3:31:01"} +{"current_steps": 3079, "total_steps": 4048, "loss": 0.21152186393737793, "lr": 2.979665424360031e-06, "epoch": 1.5214382799950574, "percentage": 76.06, "elapsed_time": "11:09:51", "remaining_time": "3:30:48"} +{"current_steps": 3080, "total_steps": 4048, "loss": 0.22989922761917114, "lr": 2.9738491305207926e-06, "epoch": 1.5219325342888916, "percentage": 76.09, "elapsed_time": "11:10:04", "remaining_time": "3:30:35"} +{"current_steps": 3081, "total_steps": 4048, "loss": 0.21606113016605377, "lr": 2.9680375272394855e-06, "epoch": 1.5224267885827258, "percentage": 76.11, "elapsed_time": "11:10:17", "remaining_time": "3:30:22"} +{"current_steps": 3082, "total_steps": 4048, "loss": 0.25060969591140747, "lr": 2.962230618395855e-06, "epoch": 1.5229210428765598, "percentage": 76.14, "elapsed_time": "11:10:31", "remaining_time": "3:30:09"} +{"current_steps": 3083, "total_steps": 4048, "loss": 0.2574993371963501, "lr": 2.9564284078665016e-06, "epoch": 1.523415297170394, "percentage": 76.16, "elapsed_time": "11:10:43", "remaining_time": "3:29:56"} +{"current_steps": 3084, "total_steps": 4048, "loss": 0.2552590072154999, "lr": 2.9506308995249035e-06, "epoch": 1.5239095514642282, "percentage": 76.19, "elapsed_time": "11:10:57", "remaining_time": "3:29:43"} +{"current_steps": 3085, "total_steps": 4048, "loss": 0.2356393188238144, "lr": 2.9448380972413936e-06, "epoch": 1.5244038057580624, "percentage": 76.21, "elapsed_time": "11:11:10", "remaining_time": "3:29:30"} +{"current_steps": 3086, "total_steps": 4048, "loss": 0.25111299753189087, "lr": 2.939050004883164e-06, "epoch": 1.5248980600518967, "percentage": 76.24, "elapsed_time": "11:11:24", "remaining_time": "3:29:17"} +{"current_steps": 3087, "total_steps": 4048, "loss": 0.2713226079940796, "lr": 2.933266626314263e-06, "epoch": 1.5253923143457309, "percentage": 76.26, "elapsed_time": "11:11:37", "remaining_time": "3:29:04"} +{"current_steps": 3088, "total_steps": 4048, "loss": 0.2493591606616974, "lr": 2.92748796539559e-06, "epoch": 1.525886568639565, "percentage": 76.28, "elapsed_time": "11:11:50", "remaining_time": "3:28:51"} +{"current_steps": 3089, "total_steps": 4048, "loss": 0.2377934455871582, "lr": 2.9217140259848984e-06, "epoch": 1.5263808229333993, "percentage": 76.31, "elapsed_time": "11:12:03", "remaining_time": "3:28:38"} +{"current_steps": 3090, "total_steps": 4048, "loss": 0.23113523423671722, "lr": 2.9159448119367896e-06, "epoch": 1.5268750772272335, "percentage": 76.33, "elapsed_time": "11:12:16", "remaining_time": "3:28:25"} +{"current_steps": 3091, "total_steps": 4048, "loss": 0.2212657630443573, "lr": 2.910180327102702e-06, "epoch": 1.5273693315210677, "percentage": 76.36, "elapsed_time": "11:12:30", "remaining_time": "3:28:12"} +{"current_steps": 3092, "total_steps": 4048, "loss": 0.3317147195339203, "lr": 2.904420575330923e-06, "epoch": 1.527863585814902, "percentage": 76.38, "elapsed_time": "11:12:43", "remaining_time": "3:27:59"} +{"current_steps": 3093, "total_steps": 4048, "loss": 0.21677865087985992, "lr": 2.8986655604665914e-06, "epoch": 1.528357840108736, "percentage": 76.41, "elapsed_time": "11:12:57", "remaining_time": "3:27:46"} +{"current_steps": 3094, "total_steps": 4048, "loss": 0.2719038724899292, "lr": 2.892915286351663e-06, "epoch": 1.52885209440257, "percentage": 76.43, "elapsed_time": "11:13:10", "remaining_time": "3:27:33"} +{"current_steps": 3095, "total_steps": 4048, "loss": 0.2870655953884125, "lr": 2.887169756824941e-06, "epoch": 1.5293463486964043, "percentage": 76.46, "elapsed_time": "11:13:23", "remaining_time": "3:27:20"} +{"current_steps": 3096, "total_steps": 4048, "loss": 0.27370864152908325, "lr": 2.8814289757220636e-06, "epoch": 1.5298406029902385, "percentage": 76.48, "elapsed_time": "11:13:36", "remaining_time": "3:27:07"} +{"current_steps": 3097, "total_steps": 4048, "loss": 0.24579623341560364, "lr": 2.8756929468754834e-06, "epoch": 1.5303348572840727, "percentage": 76.51, "elapsed_time": "11:13:49", "remaining_time": "3:26:54"} +{"current_steps": 3098, "total_steps": 4048, "loss": 0.25092196464538574, "lr": 2.869961674114501e-06, "epoch": 1.5308291115779067, "percentage": 76.53, "elapsed_time": "11:14:02", "remaining_time": "3:26:41"} +{"current_steps": 3099, "total_steps": 4048, "loss": 0.29637211561203003, "lr": 2.864235161265232e-06, "epoch": 1.531323365871741, "percentage": 76.56, "elapsed_time": "11:14:16", "remaining_time": "3:26:28"} +{"current_steps": 3100, "total_steps": 4048, "loss": 0.24216854572296143, "lr": 2.8585134121506086e-06, "epoch": 1.531817620165575, "percentage": 76.58, "elapsed_time": "11:14:29", "remaining_time": "3:26:15"} +{"current_steps": 3101, "total_steps": 4048, "loss": 0.2050018608570099, "lr": 2.8527964305903887e-06, "epoch": 1.5323118744594093, "percentage": 76.61, "elapsed_time": "11:14:48", "remaining_time": "3:26:04"} +{"current_steps": 3102, "total_steps": 4048, "loss": 0.2323600798845291, "lr": 2.8470842204011562e-06, "epoch": 1.5328061287532435, "percentage": 76.63, "elapsed_time": "11:15:01", "remaining_time": "3:25:51"} +{"current_steps": 3103, "total_steps": 4048, "loss": 0.2582741379737854, "lr": 2.8413767853962937e-06, "epoch": 1.5333003830470777, "percentage": 76.66, "elapsed_time": "11:15:14", "remaining_time": "3:25:38"} +{"current_steps": 3104, "total_steps": 4048, "loss": 0.2190069705247879, "lr": 2.8356741293860034e-06, "epoch": 1.533794637340912, "percentage": 76.68, "elapsed_time": "11:15:26", "remaining_time": "3:25:25"} +{"current_steps": 3105, "total_steps": 4048, "loss": 0.2293972671031952, "lr": 2.8299762561773004e-06, "epoch": 1.5342888916347461, "percentage": 76.7, "elapsed_time": "11:15:40", "remaining_time": "3:25:12"} +{"current_steps": 3106, "total_steps": 4048, "loss": 0.28793102502822876, "lr": 2.8242831695740004e-06, "epoch": 1.5347831459285803, "percentage": 76.73, "elapsed_time": "11:15:52", "remaining_time": "3:24:59"} +{"current_steps": 3107, "total_steps": 4048, "loss": 0.25700464844703674, "lr": 2.8185948733767276e-06, "epoch": 1.5352774002224145, "percentage": 76.75, "elapsed_time": "11:16:06", "remaining_time": "3:24:46"} +{"current_steps": 3108, "total_steps": 4048, "loss": 0.2633448541164398, "lr": 2.8129113713829115e-06, "epoch": 1.5357716545162488, "percentage": 76.78, "elapsed_time": "11:16:19", "remaining_time": "3:24:32"} +{"current_steps": 3109, "total_steps": 4048, "loss": 0.2363145351409912, "lr": 2.8072326673867667e-06, "epoch": 1.5362659088100827, "percentage": 76.8, "elapsed_time": "11:16:32", "remaining_time": "3:24:19"} +{"current_steps": 3110, "total_steps": 4048, "loss": 0.24324053525924683, "lr": 2.8015587651793273e-06, "epoch": 1.536760163103917, "percentage": 76.83, "elapsed_time": "11:16:45", "remaining_time": "3:24:06"} +{"current_steps": 3111, "total_steps": 4048, "loss": 0.24139198660850525, "lr": 2.795889668548399e-06, "epoch": 1.5372544173977511, "percentage": 76.85, "elapsed_time": "11:16:57", "remaining_time": "3:23:53"} +{"current_steps": 3112, "total_steps": 4048, "loss": 0.2502334713935852, "lr": 2.790225381278595e-06, "epoch": 1.5377486716915854, "percentage": 76.88, "elapsed_time": "11:17:11", "remaining_time": "3:23:40"} +{"current_steps": 3113, "total_steps": 4048, "loss": 0.24635109305381775, "lr": 2.784565907151311e-06, "epoch": 1.5382429259854193, "percentage": 76.9, "elapsed_time": "11:17:24", "remaining_time": "3:23:27"} +{"current_steps": 3114, "total_steps": 4048, "loss": 0.2299586534500122, "lr": 2.7789112499447312e-06, "epoch": 1.5387371802792535, "percentage": 76.93, "elapsed_time": "11:17:37", "remaining_time": "3:23:14"} +{"current_steps": 3115, "total_steps": 4048, "loss": 0.2296627312898636, "lr": 2.7732614134338243e-06, "epoch": 1.5392314345730878, "percentage": 76.95, "elapsed_time": "11:17:50", "remaining_time": "3:23:01"} +{"current_steps": 3116, "total_steps": 4048, "loss": 0.26127320528030396, "lr": 2.767616401390343e-06, "epoch": 1.539725688866922, "percentage": 76.98, "elapsed_time": "11:18:03", "remaining_time": "3:22:48"} +{"current_steps": 3117, "total_steps": 4048, "loss": 0.24718445539474487, "lr": 2.761976217582808e-06, "epoch": 1.5402199431607562, "percentage": 77.0, "elapsed_time": "11:18:16", "remaining_time": "3:22:35"} +{"current_steps": 3118, "total_steps": 4048, "loss": 0.22314362227916718, "lr": 2.7563408657765345e-06, "epoch": 1.5407141974545904, "percentage": 77.03, "elapsed_time": "11:18:30", "remaining_time": "3:22:22"} +{"current_steps": 3119, "total_steps": 4048, "loss": 0.2288416028022766, "lr": 2.750710349733602e-06, "epoch": 1.5412084517484246, "percentage": 77.05, "elapsed_time": "11:18:42", "remaining_time": "3:22:09"} +{"current_steps": 3120, "total_steps": 4048, "loss": 0.26181158423423767, "lr": 2.7450846732128577e-06, "epoch": 1.5417027060422588, "percentage": 77.08, "elapsed_time": "11:18:56", "remaining_time": "3:21:56"} +{"current_steps": 3121, "total_steps": 4048, "loss": 0.22397834062576294, "lr": 2.739463839969926e-06, "epoch": 1.542196960336093, "percentage": 77.1, "elapsed_time": "11:19:09", "remaining_time": "3:21:43"} +{"current_steps": 3122, "total_steps": 4048, "loss": 0.23633858561515808, "lr": 2.7338478537571943e-06, "epoch": 1.5426912146299272, "percentage": 77.12, "elapsed_time": "11:19:22", "remaining_time": "3:21:30"} +{"current_steps": 3123, "total_steps": 4048, "loss": 0.26719149947166443, "lr": 2.7282367183238143e-06, "epoch": 1.5431854689237614, "percentage": 77.15, "elapsed_time": "11:19:35", "remaining_time": "3:21:17"} +{"current_steps": 3124, "total_steps": 4048, "loss": 0.2882165014743805, "lr": 2.722630437415701e-06, "epoch": 1.5436797232175956, "percentage": 77.17, "elapsed_time": "11:19:48", "remaining_time": "3:21:04"} +{"current_steps": 3125, "total_steps": 4048, "loss": 0.2377905696630478, "lr": 2.7170290147755285e-06, "epoch": 1.5441739775114296, "percentage": 77.2, "elapsed_time": "11:20:01", "remaining_time": "3:20:51"} +{"current_steps": 3126, "total_steps": 4048, "loss": 0.2705368399620056, "lr": 2.7114324541427193e-06, "epoch": 1.5446682318052638, "percentage": 77.22, "elapsed_time": "11:20:14", "remaining_time": "3:20:38"} +{"current_steps": 3127, "total_steps": 4048, "loss": 0.246593177318573, "lr": 2.7058407592534663e-06, "epoch": 1.545162486099098, "percentage": 77.25, "elapsed_time": "11:20:27", "remaining_time": "3:20:24"} +{"current_steps": 3128, "total_steps": 4048, "loss": 0.2339816391468048, "lr": 2.700253933840705e-06, "epoch": 1.5456567403929322, "percentage": 77.27, "elapsed_time": "11:20:39", "remaining_time": "3:20:11"} +{"current_steps": 3129, "total_steps": 4048, "loss": 0.2727898359298706, "lr": 2.6946719816341127e-06, "epoch": 1.5461509946867662, "percentage": 77.3, "elapsed_time": "11:20:53", "remaining_time": "3:19:58"} +{"current_steps": 3130, "total_steps": 4048, "loss": 0.285343736410141, "lr": 2.6890949063601255e-06, "epoch": 1.5466452489806004, "percentage": 77.32, "elapsed_time": "11:21:05", "remaining_time": "3:19:45"} +{"current_steps": 3131, "total_steps": 4048, "loss": 0.25782397389411926, "lr": 2.6835227117419184e-06, "epoch": 1.5471395032744346, "percentage": 77.35, "elapsed_time": "11:21:19", "remaining_time": "3:19:32"} +{"current_steps": 3132, "total_steps": 4048, "loss": 0.26677054166793823, "lr": 2.67795540149941e-06, "epoch": 1.5476337575682688, "percentage": 77.37, "elapsed_time": "11:21:31", "remaining_time": "3:19:19"} +{"current_steps": 3133, "total_steps": 4048, "loss": 0.2696993052959442, "lr": 2.6723929793492555e-06, "epoch": 1.548128011862103, "percentage": 77.4, "elapsed_time": "11:21:45", "remaining_time": "3:19:06"} +{"current_steps": 3134, "total_steps": 4048, "loss": 0.2536013424396515, "lr": 2.66683544900485e-06, "epoch": 1.5486222661559372, "percentage": 77.42, "elapsed_time": "11:21:57", "remaining_time": "3:18:53"} +{"current_steps": 3135, "total_steps": 4048, "loss": 0.2583885192871094, "lr": 2.661282814176319e-06, "epoch": 1.5491165204497714, "percentage": 77.45, "elapsed_time": "11:22:11", "remaining_time": "3:18:40"} +{"current_steps": 3136, "total_steps": 4048, "loss": 0.24341340363025665, "lr": 2.655735078570528e-06, "epoch": 1.5496107747436056, "percentage": 77.47, "elapsed_time": "11:22:23", "remaining_time": "3:18:27"} +{"current_steps": 3137, "total_steps": 4048, "loss": 0.2575637698173523, "lr": 2.650192245891059e-06, "epoch": 1.5501050290374399, "percentage": 77.5, "elapsed_time": "11:22:36", "remaining_time": "3:18:14"} +{"current_steps": 3138, "total_steps": 4048, "loss": 0.24109753966331482, "lr": 2.644654319838227e-06, "epoch": 1.550599283331274, "percentage": 77.52, "elapsed_time": "11:22:49", "remaining_time": "3:18:00"} +{"current_steps": 3139, "total_steps": 4048, "loss": 0.246525377035141, "lr": 2.6391213041090822e-06, "epoch": 1.5510935376251083, "percentage": 77.54, "elapsed_time": "11:23:02", "remaining_time": "3:17:47"} +{"current_steps": 3140, "total_steps": 4048, "loss": 0.2589566111564636, "lr": 2.6335932023973777e-06, "epoch": 1.5515877919189422, "percentage": 77.57, "elapsed_time": "11:23:15", "remaining_time": "3:17:34"} +{"current_steps": 3141, "total_steps": 4048, "loss": 0.26198744773864746, "lr": 2.628070018393598e-06, "epoch": 1.5520820462127765, "percentage": 77.59, "elapsed_time": "11:23:28", "remaining_time": "3:17:21"} +{"current_steps": 3142, "total_steps": 4048, "loss": 0.22991782426834106, "lr": 2.622551755784942e-06, "epoch": 1.5525763005066107, "percentage": 77.62, "elapsed_time": "11:23:41", "remaining_time": "3:17:08"} +{"current_steps": 3143, "total_steps": 4048, "loss": 0.22211629152297974, "lr": 2.6170384182553244e-06, "epoch": 1.5530705548004449, "percentage": 77.64, "elapsed_time": "11:23:54", "remaining_time": "3:16:55"} +{"current_steps": 3144, "total_steps": 4048, "loss": 0.2665289640426636, "lr": 2.6115300094853666e-06, "epoch": 1.5535648090942789, "percentage": 77.67, "elapsed_time": "11:24:07", "remaining_time": "3:16:42"} +{"current_steps": 3145, "total_steps": 4048, "loss": 0.20211085677146912, "lr": 2.6060265331524114e-06, "epoch": 1.554059063388113, "percentage": 77.69, "elapsed_time": "11:24:20", "remaining_time": "3:16:29"} +{"current_steps": 3146, "total_steps": 4048, "loss": 0.24264919757843018, "lr": 2.6005279929304918e-06, "epoch": 1.5545533176819473, "percentage": 77.72, "elapsed_time": "11:24:34", "remaining_time": "3:16:16"} +{"current_steps": 3147, "total_steps": 4048, "loss": 0.2722601294517517, "lr": 2.595034392490354e-06, "epoch": 1.5550475719757815, "percentage": 77.74, "elapsed_time": "11:24:46", "remaining_time": "3:16:03"} +{"current_steps": 3148, "total_steps": 4048, "loss": 0.26061201095581055, "lr": 2.58954573549946e-06, "epoch": 1.5555418262696157, "percentage": 77.77, "elapsed_time": "11:24:59", "remaining_time": "3:15:50"} +{"current_steps": 3149, "total_steps": 4048, "loss": 0.20620305836200714, "lr": 2.5840620256219464e-06, "epoch": 1.5560360805634499, "percentage": 77.79, "elapsed_time": "11:25:12", "remaining_time": "3:15:37"} +{"current_steps": 3150, "total_steps": 4048, "loss": 0.2424723207950592, "lr": 2.578583266518664e-06, "epoch": 1.556530334857284, "percentage": 77.82, "elapsed_time": "11:25:25", "remaining_time": "3:15:24"} +{"current_steps": 3151, "total_steps": 4048, "loss": 0.248019739985466, "lr": 2.573109461847153e-06, "epoch": 1.5570245891511183, "percentage": 77.84, "elapsed_time": "11:25:38", "remaining_time": "3:15:10"} +{"current_steps": 3152, "total_steps": 4048, "loss": 0.23162522912025452, "lr": 2.5676406152616483e-06, "epoch": 1.5575188434449525, "percentage": 77.87, "elapsed_time": "11:25:51", "remaining_time": "3:14:57"} +{"current_steps": 3153, "total_steps": 4048, "loss": 0.20099176466464996, "lr": 2.562176730413074e-06, "epoch": 1.5580130977387867, "percentage": 77.89, "elapsed_time": "11:26:04", "remaining_time": "3:14:44"} +{"current_steps": 3154, "total_steps": 4048, "loss": 0.27957430481910706, "lr": 2.5567178109490433e-06, "epoch": 1.558507352032621, "percentage": 77.92, "elapsed_time": "11:26:17", "remaining_time": "3:14:31"} +{"current_steps": 3155, "total_steps": 4048, "loss": 0.23941464722156525, "lr": 2.551263860513845e-06, "epoch": 1.5590016063264551, "percentage": 77.94, "elapsed_time": "11:26:29", "remaining_time": "3:14:18"} +{"current_steps": 3156, "total_steps": 4048, "loss": 0.24910275638103485, "lr": 2.5458148827484695e-06, "epoch": 1.559495860620289, "percentage": 77.96, "elapsed_time": "11:26:42", "remaining_time": "3:14:05"} +{"current_steps": 3157, "total_steps": 4048, "loss": 0.26430344581604004, "lr": 2.540370881290568e-06, "epoch": 1.5599901149141233, "percentage": 77.99, "elapsed_time": "11:26:55", "remaining_time": "3:13:52"} +{"current_steps": 3158, "total_steps": 4048, "loss": 0.2833614945411682, "lr": 2.534931859774481e-06, "epoch": 1.5604843692079575, "percentage": 78.01, "elapsed_time": "11:27:08", "remaining_time": "3:13:39"} +{"current_steps": 3159, "total_steps": 4048, "loss": 0.24630708992481232, "lr": 2.5294978218312215e-06, "epoch": 1.5609786235017915, "percentage": 78.04, "elapsed_time": "11:27:21", "remaining_time": "3:13:26"} +{"current_steps": 3160, "total_steps": 4048, "loss": 0.2674857974052429, "lr": 2.524068771088476e-06, "epoch": 1.5614728777956257, "percentage": 78.06, "elapsed_time": "11:27:34", "remaining_time": "3:13:13"} +{"current_steps": 3161, "total_steps": 4048, "loss": 0.23531441390514374, "lr": 2.5186447111706005e-06, "epoch": 1.56196713208946, "percentage": 78.09, "elapsed_time": "11:27:48", "remaining_time": "3:13:00"} +{"current_steps": 3162, "total_steps": 4048, "loss": 0.2603223919868469, "lr": 2.5132256456986236e-06, "epoch": 1.5624613863832941, "percentage": 78.11, "elapsed_time": "11:28:00", "remaining_time": "3:12:47"} +{"current_steps": 3163, "total_steps": 4048, "loss": 0.220007985830307, "lr": 2.5078115782902267e-06, "epoch": 1.5629556406771283, "percentage": 78.14, "elapsed_time": "11:28:14", "remaining_time": "3:12:34"} +{"current_steps": 3164, "total_steps": 4048, "loss": 0.22660651803016663, "lr": 2.502402512559773e-06, "epoch": 1.5634498949709625, "percentage": 78.16, "elapsed_time": "11:28:27", "remaining_time": "3:12:20"} +{"current_steps": 3165, "total_steps": 4048, "loss": 0.26425695419311523, "lr": 2.4969984521182766e-06, "epoch": 1.5639441492647967, "percentage": 78.19, "elapsed_time": "11:28:40", "remaining_time": "3:12:07"} +{"current_steps": 3166, "total_steps": 4048, "loss": 0.22870787978172302, "lr": 2.4915994005734057e-06, "epoch": 1.564438403558631, "percentage": 78.21, "elapsed_time": "11:28:53", "remaining_time": "3:11:54"} +{"current_steps": 3167, "total_steps": 4048, "loss": 0.25734084844589233, "lr": 2.48620536152949e-06, "epoch": 1.5649326578524652, "percentage": 78.24, "elapsed_time": "11:29:06", "remaining_time": "3:11:41"} +{"current_steps": 3168, "total_steps": 4048, "loss": 0.24831843376159668, "lr": 2.4808163385875226e-06, "epoch": 1.5654269121462994, "percentage": 78.26, "elapsed_time": "11:29:19", "remaining_time": "3:11:28"} +{"current_steps": 3169, "total_steps": 4048, "loss": 0.2389685958623886, "lr": 2.4754323353451284e-06, "epoch": 1.5659211664401336, "percentage": 78.29, "elapsed_time": "11:29:32", "remaining_time": "3:11:15"} +{"current_steps": 3170, "total_steps": 4048, "loss": 0.24750663340091705, "lr": 2.4700533553965946e-06, "epoch": 1.5664154207339678, "percentage": 78.31, "elapsed_time": "11:29:44", "remaining_time": "3:11:02"} +{"current_steps": 3171, "total_steps": 4048, "loss": 0.2689003348350525, "lr": 2.4646794023328525e-06, "epoch": 1.5669096750278018, "percentage": 78.33, "elapsed_time": "11:29:58", "remaining_time": "3:10:49"} +{"current_steps": 3172, "total_steps": 4048, "loss": 0.2574145197868347, "lr": 2.45931047974147e-06, "epoch": 1.567403929321636, "percentage": 78.36, "elapsed_time": "11:30:10", "remaining_time": "3:10:36"} +{"current_steps": 3173, "total_steps": 4048, "loss": 0.2586211562156677, "lr": 2.4539465912066706e-06, "epoch": 1.5678981836154702, "percentage": 78.38, "elapsed_time": "11:30:23", "remaining_time": "3:10:23"} +{"current_steps": 3174, "total_steps": 4048, "loss": 0.26383671164512634, "lr": 2.4485877403093095e-06, "epoch": 1.5683924379093044, "percentage": 78.41, "elapsed_time": "11:30:36", "remaining_time": "3:10:10"} +{"current_steps": 3175, "total_steps": 4048, "loss": 0.28196123242378235, "lr": 2.4432339306268736e-06, "epoch": 1.5688866922031384, "percentage": 78.43, "elapsed_time": "11:30:49", "remaining_time": "3:09:56"} +{"current_steps": 3176, "total_steps": 4048, "loss": 0.2736835181713104, "lr": 2.4378851657334923e-06, "epoch": 1.5693809464969726, "percentage": 78.46, "elapsed_time": "11:31:02", "remaining_time": "3:09:43"} +{"current_steps": 3177, "total_steps": 4048, "loss": 0.2316201627254486, "lr": 2.4325414491999255e-06, "epoch": 1.5698752007908068, "percentage": 78.48, "elapsed_time": "11:31:15", "remaining_time": "3:09:30"} +{"current_steps": 3178, "total_steps": 4048, "loss": 0.23955810070037842, "lr": 2.427202784593562e-06, "epoch": 1.570369455084641, "percentage": 78.51, "elapsed_time": "11:31:28", "remaining_time": "3:09:17"} +{"current_steps": 3179, "total_steps": 4048, "loss": 0.263042151927948, "lr": 2.4218691754784162e-06, "epoch": 1.5708637093784752, "percentage": 78.53, "elapsed_time": "11:31:41", "remaining_time": "3:09:04"} +{"current_steps": 3180, "total_steps": 4048, "loss": 0.25570976734161377, "lr": 2.4165406254151312e-06, "epoch": 1.5713579636723094, "percentage": 78.56, "elapsed_time": "11:31:54", "remaining_time": "3:08:51"} +{"current_steps": 3181, "total_steps": 4048, "loss": 0.2503488063812256, "lr": 2.4112171379609696e-06, "epoch": 1.5718522179661436, "percentage": 78.58, "elapsed_time": "11:32:07", "remaining_time": "3:08:38"} +{"current_steps": 3182, "total_steps": 4048, "loss": 0.21815824508666992, "lr": 2.40589871666982e-06, "epoch": 1.5723464722599778, "percentage": 78.61, "elapsed_time": "11:32:20", "remaining_time": "3:08:25"} +{"current_steps": 3183, "total_steps": 4048, "loss": 0.23936739563941956, "lr": 2.400585365092177e-06, "epoch": 1.572840726553812, "percentage": 78.63, "elapsed_time": "11:32:33", "remaining_time": "3:08:12"} +{"current_steps": 3184, "total_steps": 4048, "loss": 0.2618086636066437, "lr": 2.3952770867751595e-06, "epoch": 1.5733349808476462, "percentage": 78.66, "elapsed_time": "11:32:46", "remaining_time": "3:07:59"} +{"current_steps": 3185, "total_steps": 4048, "loss": 0.2852020263671875, "lr": 2.3899738852625065e-06, "epoch": 1.5738292351414804, "percentage": 78.68, "elapsed_time": "11:32:59", "remaining_time": "3:07:46"} +{"current_steps": 3186, "total_steps": 4048, "loss": 0.28860047459602356, "lr": 2.3846757640945505e-06, "epoch": 1.5743234894353144, "percentage": 78.71, "elapsed_time": "11:33:12", "remaining_time": "3:07:33"} +{"current_steps": 3187, "total_steps": 4048, "loss": 0.2397383451461792, "lr": 2.3793827268082446e-06, "epoch": 1.5748177437291486, "percentage": 78.73, "elapsed_time": "11:33:25", "remaining_time": "3:07:20"} +{"current_steps": 3188, "total_steps": 4048, "loss": 0.25204962491989136, "lr": 2.374094776937145e-06, "epoch": 1.5753119980229828, "percentage": 78.75, "elapsed_time": "11:33:38", "remaining_time": "3:07:07"} +{"current_steps": 3189, "total_steps": 4048, "loss": 0.21216189861297607, "lr": 2.368811918011411e-06, "epoch": 1.575806252316817, "percentage": 78.78, "elapsed_time": "11:33:51", "remaining_time": "3:06:54"} +{"current_steps": 3190, "total_steps": 4048, "loss": 0.2647620737552643, "lr": 2.363534153557805e-06, "epoch": 1.576300506610651, "percentage": 78.8, "elapsed_time": "11:34:04", "remaining_time": "3:06:40"} +{"current_steps": 3191, "total_steps": 4048, "loss": 0.3079666793346405, "lr": 2.358261487099688e-06, "epoch": 1.5767947609044852, "percentage": 78.83, "elapsed_time": "11:34:17", "remaining_time": "3:06:27"} +{"current_steps": 3192, "total_steps": 4048, "loss": 0.22961711883544922, "lr": 2.352993922157013e-06, "epoch": 1.5772890151983194, "percentage": 78.85, "elapsed_time": "11:34:30", "remaining_time": "3:06:14"} +{"current_steps": 3193, "total_steps": 4048, "loss": 0.2657305598258972, "lr": 2.347731462246331e-06, "epoch": 1.5777832694921536, "percentage": 78.88, "elapsed_time": "11:34:43", "remaining_time": "3:06:01"} +{"current_steps": 3194, "total_steps": 4048, "loss": 0.224237859249115, "lr": 2.3424741108807914e-06, "epoch": 1.5782775237859878, "percentage": 78.9, "elapsed_time": "11:34:56", "remaining_time": "3:05:48"} +{"current_steps": 3195, "total_steps": 4048, "loss": 0.26459985971450806, "lr": 2.337221871570121e-06, "epoch": 1.578771778079822, "percentage": 78.93, "elapsed_time": "11:35:10", "remaining_time": "3:05:35"} +{"current_steps": 3196, "total_steps": 4048, "loss": 0.25391027331352234, "lr": 2.331974747820641e-06, "epoch": 1.5792660323736563, "percentage": 78.95, "elapsed_time": "11:35:22", "remaining_time": "3:05:22"} +{"current_steps": 3197, "total_steps": 4048, "loss": 0.25822141766548157, "lr": 2.326732743135256e-06, "epoch": 1.5797602866674905, "percentage": 78.98, "elapsed_time": "11:35:35", "remaining_time": "3:05:09"} +{"current_steps": 3198, "total_steps": 4048, "loss": 0.25140073895454407, "lr": 2.3214958610134554e-06, "epoch": 1.5802545409613247, "percentage": 79.0, "elapsed_time": "11:35:49", "remaining_time": "3:04:56"} +{"current_steps": 3199, "total_steps": 4048, "loss": 0.2550397515296936, "lr": 2.3162641049513035e-06, "epoch": 1.5807487952551589, "percentage": 79.03, "elapsed_time": "11:36:02", "remaining_time": "3:04:43"} +{"current_steps": 3200, "total_steps": 4048, "loss": 0.2648996412754059, "lr": 2.3110374784414526e-06, "epoch": 1.581243049548993, "percentage": 79.05, "elapsed_time": "11:36:14", "remaining_time": "3:04:30"} +{"current_steps": 3201, "total_steps": 4048, "loss": 0.235626682639122, "lr": 2.3058159849731134e-06, "epoch": 1.5817373038428273, "percentage": 79.08, "elapsed_time": "11:36:33", "remaining_time": "3:04:18"} +{"current_steps": 3202, "total_steps": 4048, "loss": 0.24930328130722046, "lr": 2.3005996280320873e-06, "epoch": 1.5822315581366613, "percentage": 79.1, "elapsed_time": "11:36:46", "remaining_time": "3:04:05"} +{"current_steps": 3203, "total_steps": 4048, "loss": 0.23712117969989777, "lr": 2.2953884111007428e-06, "epoch": 1.5827258124304955, "percentage": 79.13, "elapsed_time": "11:36:58", "remaining_time": "3:03:52"} +{"current_steps": 3204, "total_steps": 4048, "loss": 0.2504096031188965, "lr": 2.290182337658007e-06, "epoch": 1.5832200667243297, "percentage": 79.15, "elapsed_time": "11:37:12", "remaining_time": "3:03:39"} +{"current_steps": 3205, "total_steps": 4048, "loss": 0.2218465358018875, "lr": 2.2849814111793823e-06, "epoch": 1.583714321018164, "percentage": 79.17, "elapsed_time": "11:37:24", "remaining_time": "3:03:26"} +{"current_steps": 3206, "total_steps": 4048, "loss": 0.2653011977672577, "lr": 2.279785635136933e-06, "epoch": 1.5842085753119979, "percentage": 79.2, "elapsed_time": "11:37:37", "remaining_time": "3:03:13"} +{"current_steps": 3207, "total_steps": 4048, "loss": 0.27551597356796265, "lr": 2.2745950129992853e-06, "epoch": 1.584702829605832, "percentage": 79.22, "elapsed_time": "11:37:50", "remaining_time": "3:03:00"} +{"current_steps": 3208, "total_steps": 4048, "loss": 0.21494519710540771, "lr": 2.2694095482316247e-06, "epoch": 1.5851970838996663, "percentage": 79.25, "elapsed_time": "11:38:03", "remaining_time": "3:02:46"} +{"current_steps": 3209, "total_steps": 4048, "loss": 0.2517405152320862, "lr": 2.2642292442956925e-06, "epoch": 1.5856913381935005, "percentage": 79.27, "elapsed_time": "11:38:15", "remaining_time": "3:02:33"} +{"current_steps": 3210, "total_steps": 4048, "loss": 0.25777050852775574, "lr": 2.259054104649786e-06, "epoch": 1.5861855924873347, "percentage": 79.3, "elapsed_time": "11:38:28", "remaining_time": "3:02:20"} +{"current_steps": 3211, "total_steps": 4048, "loss": 0.25914469361305237, "lr": 2.2538841327487582e-06, "epoch": 1.586679846781169, "percentage": 79.32, "elapsed_time": "11:38:41", "remaining_time": "3:02:07"} +{"current_steps": 3212, "total_steps": 4048, "loss": 0.23877818882465363, "lr": 2.2487193320440017e-06, "epoch": 1.5871741010750031, "percentage": 79.35, "elapsed_time": "11:38:53", "remaining_time": "3:01:54"} +{"current_steps": 3213, "total_steps": 4048, "loss": 0.2226967066526413, "lr": 2.2435597059834635e-06, "epoch": 1.5876683553688373, "percentage": 79.37, "elapsed_time": "11:39:06", "remaining_time": "3:01:41"} +{"current_steps": 3214, "total_steps": 4048, "loss": 0.28768399357795715, "lr": 2.2384052580116465e-06, "epoch": 1.5881626096626715, "percentage": 79.4, "elapsed_time": "11:39:19", "remaining_time": "3:01:27"} +{"current_steps": 3215, "total_steps": 4048, "loss": 0.2563883662223816, "lr": 2.233255991569575e-06, "epoch": 1.5886568639565057, "percentage": 79.42, "elapsed_time": "11:39:32", "remaining_time": "3:01:14"} +{"current_steps": 3216, "total_steps": 4048, "loss": 0.2595394551753998, "lr": 2.2281119100948322e-06, "epoch": 1.58915111825034, "percentage": 79.45, "elapsed_time": "11:39:44", "remaining_time": "3:01:01"} +{"current_steps": 3217, "total_steps": 4048, "loss": 0.2354460060596466, "lr": 2.2229730170215324e-06, "epoch": 1.589645372544174, "percentage": 79.47, "elapsed_time": "11:39:57", "remaining_time": "3:00:48"} +{"current_steps": 3218, "total_steps": 4048, "loss": 0.2397463619709015, "lr": 2.2178393157803225e-06, "epoch": 1.5901396268380081, "percentage": 79.5, "elapsed_time": "11:40:09", "remaining_time": "3:00:35"} +{"current_steps": 3219, "total_steps": 4048, "loss": 0.3304588794708252, "lr": 2.212710809798393e-06, "epoch": 1.5906338811318423, "percentage": 79.52, "elapsed_time": "11:40:22", "remaining_time": "3:00:22"} +{"current_steps": 3220, "total_steps": 4048, "loss": 0.23891952633857727, "lr": 2.207587502499464e-06, "epoch": 1.5911281354256765, "percentage": 79.55, "elapsed_time": "11:40:34", "remaining_time": "3:00:08"} +{"current_steps": 3221, "total_steps": 4048, "loss": 0.2544774115085602, "lr": 2.2024693973037747e-06, "epoch": 1.5916223897195105, "percentage": 79.57, "elapsed_time": "11:40:46", "remaining_time": "2:59:55"} +{"current_steps": 3222, "total_steps": 4048, "loss": 0.2620859444141388, "lr": 2.1973564976281003e-06, "epoch": 1.5921166440133447, "percentage": 79.59, "elapsed_time": "11:40:59", "remaining_time": "2:59:42"} +{"current_steps": 3223, "total_steps": 4048, "loss": 0.22541281580924988, "lr": 2.192248806885747e-06, "epoch": 1.592610898307179, "percentage": 79.62, "elapsed_time": "11:41:11", "remaining_time": "2:59:29"} +{"current_steps": 3224, "total_steps": 4048, "loss": 0.23454351723194122, "lr": 2.187146328486529e-06, "epoch": 1.5931051526010132, "percentage": 79.64, "elapsed_time": "11:41:24", "remaining_time": "2:59:16"} +{"current_steps": 3225, "total_steps": 4048, "loss": 0.24848732352256775, "lr": 2.18204906583679e-06, "epoch": 1.5935994068948474, "percentage": 79.67, "elapsed_time": "11:41:36", "remaining_time": "2:59:02"} +{"current_steps": 3226, "total_steps": 4048, "loss": 0.21949590742588043, "lr": 2.176957022339389e-06, "epoch": 1.5940936611886816, "percentage": 79.69, "elapsed_time": "11:41:48", "remaining_time": "2:58:49"} +{"current_steps": 3227, "total_steps": 4048, "loss": 0.27300944924354553, "lr": 2.171870201393703e-06, "epoch": 1.5945879154825158, "percentage": 79.72, "elapsed_time": "11:42:01", "remaining_time": "2:58:36"} +{"current_steps": 3228, "total_steps": 4048, "loss": 0.23850613832473755, "lr": 2.16678860639562e-06, "epoch": 1.59508216977635, "percentage": 79.74, "elapsed_time": "11:42:14", "remaining_time": "2:58:23"} +{"current_steps": 3229, "total_steps": 4048, "loss": 0.2728792428970337, "lr": 2.1617122407375424e-06, "epoch": 1.5955764240701842, "percentage": 79.77, "elapsed_time": "11:42:27", "remaining_time": "2:58:10"} +{"current_steps": 3230, "total_steps": 4048, "loss": 0.2321755588054657, "lr": 2.1566411078083726e-06, "epoch": 1.5960706783640184, "percentage": 79.79, "elapsed_time": "11:42:40", "remaining_time": "2:57:57"} +{"current_steps": 3231, "total_steps": 4048, "loss": 0.30118101835250854, "lr": 2.1515752109935374e-06, "epoch": 1.5965649326578526, "percentage": 79.82, "elapsed_time": "11:42:52", "remaining_time": "2:57:43"} +{"current_steps": 3232, "total_steps": 4048, "loss": 0.22317390143871307, "lr": 2.1465145536749475e-06, "epoch": 1.5970591869516868, "percentage": 79.84, "elapsed_time": "11:43:06", "remaining_time": "2:57:30"} +{"current_steps": 3233, "total_steps": 4048, "loss": 0.2906285524368286, "lr": 2.141459139231029e-06, "epoch": 1.5975534412455208, "percentage": 79.87, "elapsed_time": "11:43:18", "remaining_time": "2:57:17"} +{"current_steps": 3234, "total_steps": 4048, "loss": 0.24645069241523743, "lr": 2.136408971036704e-06, "epoch": 1.598047695539355, "percentage": 79.89, "elapsed_time": "11:43:31", "remaining_time": "2:57:04"} +{"current_steps": 3235, "total_steps": 4048, "loss": 0.26764121651649475, "lr": 2.1313640524633927e-06, "epoch": 1.5985419498331892, "percentage": 79.92, "elapsed_time": "11:43:44", "remaining_time": "2:56:51"} +{"current_steps": 3236, "total_steps": 4048, "loss": 0.2811397910118103, "lr": 2.126324386879012e-06, "epoch": 1.5990362041270234, "percentage": 79.94, "elapsed_time": "11:43:57", "remaining_time": "2:56:38"} +{"current_steps": 3237, "total_steps": 4048, "loss": 0.254316508769989, "lr": 2.121289977647971e-06, "epoch": 1.5995304584208574, "percentage": 79.97, "elapsed_time": "11:44:09", "remaining_time": "2:56:25"} +{"current_steps": 3238, "total_steps": 4048, "loss": 0.2479352205991745, "lr": 2.1162608281311636e-06, "epoch": 1.6000247127146916, "percentage": 79.99, "elapsed_time": "11:44:21", "remaining_time": "2:56:11"} +{"current_steps": 3239, "total_steps": 4048, "loss": 0.22767537832260132, "lr": 2.1112369416859847e-06, "epoch": 1.6005189670085258, "percentage": 80.01, "elapsed_time": "11:44:34", "remaining_time": "2:55:58"} +{"current_steps": 3240, "total_steps": 4048, "loss": 0.24286411702632904, "lr": 2.106218321666309e-06, "epoch": 1.60101322130236, "percentage": 80.04, "elapsed_time": "11:44:47", "remaining_time": "2:55:45"} +{"current_steps": 3241, "total_steps": 4048, "loss": 0.22960595786571503, "lr": 2.1012049714224914e-06, "epoch": 1.6015074755961942, "percentage": 80.06, "elapsed_time": "11:44:59", "remaining_time": "2:55:32"} +{"current_steps": 3242, "total_steps": 4048, "loss": 0.2448965162038803, "lr": 2.0961968943013742e-06, "epoch": 1.6020017298900284, "percentage": 80.09, "elapsed_time": "11:45:12", "remaining_time": "2:55:19"} +{"current_steps": 3243, "total_steps": 4048, "loss": 0.23486846685409546, "lr": 2.0911940936462794e-06, "epoch": 1.6024959841838626, "percentage": 80.11, "elapsed_time": "11:45:24", "remaining_time": "2:55:06"} +{"current_steps": 3244, "total_steps": 4048, "loss": 0.2470572590827942, "lr": 2.0861965727970045e-06, "epoch": 1.6029902384776968, "percentage": 80.14, "elapsed_time": "11:45:37", "remaining_time": "2:54:53"} +{"current_steps": 3245, "total_steps": 4048, "loss": 0.283765971660614, "lr": 2.0812043350898226e-06, "epoch": 1.603484492771531, "percentage": 80.16, "elapsed_time": "11:45:50", "remaining_time": "2:54:39"} +{"current_steps": 3246, "total_steps": 4048, "loss": 0.24943199753761292, "lr": 2.076217383857484e-06, "epoch": 1.6039787470653653, "percentage": 80.19, "elapsed_time": "11:46:03", "remaining_time": "2:54:26"} +{"current_steps": 3247, "total_steps": 4048, "loss": 0.22150146961212158, "lr": 2.0712357224291966e-06, "epoch": 1.6044730013591995, "percentage": 80.21, "elapsed_time": "11:46:16", "remaining_time": "2:54:13"} +{"current_steps": 3248, "total_steps": 4048, "loss": 0.2610163390636444, "lr": 2.0662593541306563e-06, "epoch": 1.6049672556530334, "percentage": 80.24, "elapsed_time": "11:46:28", "remaining_time": "2:54:00"} +{"current_steps": 3249, "total_steps": 4048, "loss": 0.22789397835731506, "lr": 2.0612882822840154e-06, "epoch": 1.6054615099468676, "percentage": 80.26, "elapsed_time": "11:46:42", "remaining_time": "2:53:47"} +{"current_steps": 3250, "total_steps": 4048, "loss": 0.22956407070159912, "lr": 2.056322510207882e-06, "epoch": 1.6059557642407019, "percentage": 80.29, "elapsed_time": "11:46:54", "remaining_time": "2:53:34"} +{"current_steps": 3251, "total_steps": 4048, "loss": 0.2579299509525299, "lr": 2.051362041217341e-06, "epoch": 1.606450018534536, "percentage": 80.31, "elapsed_time": "11:47:07", "remaining_time": "2:53:21"} +{"current_steps": 3252, "total_steps": 4048, "loss": 0.24655218422412872, "lr": 2.046406878623929e-06, "epoch": 1.60694427282837, "percentage": 80.34, "elapsed_time": "11:47:20", "remaining_time": "2:53:08"} +{"current_steps": 3253, "total_steps": 4048, "loss": 0.2325882464647293, "lr": 2.0414570257356415e-06, "epoch": 1.6074385271222043, "percentage": 80.36, "elapsed_time": "11:47:32", "remaining_time": "2:52:55"} +{"current_steps": 3254, "total_steps": 4048, "loss": 0.2678581476211548, "lr": 2.0365124858569294e-06, "epoch": 1.6079327814160385, "percentage": 80.39, "elapsed_time": "11:47:45", "remaining_time": "2:52:41"} +{"current_steps": 3255, "total_steps": 4048, "loss": 0.23200136423110962, "lr": 2.0315732622886976e-06, "epoch": 1.6084270357098727, "percentage": 80.41, "elapsed_time": "11:47:57", "remaining_time": "2:52:28"} +{"current_steps": 3256, "total_steps": 4048, "loss": 0.24957536160945892, "lr": 2.0266393583283015e-06, "epoch": 1.6089212900037069, "percentage": 80.43, "elapsed_time": "11:48:10", "remaining_time": "2:52:15"} +{"current_steps": 3257, "total_steps": 4048, "loss": 0.2506657540798187, "lr": 2.0217107772695467e-06, "epoch": 1.609415544297541, "percentage": 80.46, "elapsed_time": "11:48:22", "remaining_time": "2:52:02"} +{"current_steps": 3258, "total_steps": 4048, "loss": 0.22255182266235352, "lr": 2.0167875224026788e-06, "epoch": 1.6099097985913753, "percentage": 80.48, "elapsed_time": "11:48:36", "remaining_time": "2:51:49"} +{"current_steps": 3259, "total_steps": 4048, "loss": 0.2489611655473709, "lr": 2.011869597014392e-06, "epoch": 1.6104040528852095, "percentage": 80.51, "elapsed_time": "11:48:48", "remaining_time": "2:51:36"} +{"current_steps": 3260, "total_steps": 4048, "loss": 0.24808533489704132, "lr": 2.0069570043878305e-06, "epoch": 1.6108983071790437, "percentage": 80.53, "elapsed_time": "11:49:02", "remaining_time": "2:51:23"} +{"current_steps": 3261, "total_steps": 4048, "loss": 0.24013441801071167, "lr": 2.0020497478025635e-06, "epoch": 1.611392561472878, "percentage": 80.56, "elapsed_time": "11:49:15", "remaining_time": "2:51:10"} +{"current_steps": 3262, "total_steps": 4048, "loss": 0.2813841998577118, "lr": 1.997147830534608e-06, "epoch": 1.6118868157667121, "percentage": 80.58, "elapsed_time": "11:49:28", "remaining_time": "2:50:57"} +{"current_steps": 3263, "total_steps": 4048, "loss": 0.23727375268936157, "lr": 1.9922512558564154e-06, "epoch": 1.612381070060546, "percentage": 80.61, "elapsed_time": "11:49:40", "remaining_time": "2:50:43"} +{"current_steps": 3264, "total_steps": 4048, "loss": 0.2341655194759369, "lr": 1.9873600270368664e-06, "epoch": 1.6128753243543803, "percentage": 80.63, "elapsed_time": "11:49:53", "remaining_time": "2:50:30"} +{"current_steps": 3265, "total_steps": 4048, "loss": 0.32069963216781616, "lr": 1.9824741473412768e-06, "epoch": 1.6133695786482145, "percentage": 80.66, "elapsed_time": "11:50:06", "remaining_time": "2:50:17"} +{"current_steps": 3266, "total_steps": 4048, "loss": 0.2414681762456894, "lr": 1.977593620031393e-06, "epoch": 1.6138638329420487, "percentage": 80.68, "elapsed_time": "11:50:19", "remaining_time": "2:50:04"} +{"current_steps": 3267, "total_steps": 4048, "loss": 0.25517842173576355, "lr": 1.9727184483653793e-06, "epoch": 1.6143580872358827, "percentage": 80.71, "elapsed_time": "11:50:32", "remaining_time": "2:49:51"} +{"current_steps": 3268, "total_steps": 4048, "loss": 0.28264889121055603, "lr": 1.967848635597831e-06, "epoch": 1.614852341529717, "percentage": 80.73, "elapsed_time": "11:50:45", "remaining_time": "2:49:38"} +{"current_steps": 3269, "total_steps": 4048, "loss": 0.24543075263500214, "lr": 1.962984184979774e-06, "epoch": 1.6153465958235511, "percentage": 80.76, "elapsed_time": "11:50:58", "remaining_time": "2:49:25"} +{"current_steps": 3270, "total_steps": 4048, "loss": 0.2770763039588928, "lr": 1.9581250997586366e-06, "epoch": 1.6158408501173853, "percentage": 80.78, "elapsed_time": "11:51:10", "remaining_time": "2:49:12"} +{"current_steps": 3271, "total_steps": 4048, "loss": 0.2521423101425171, "lr": 1.953271383178278e-06, "epoch": 1.6163351044112195, "percentage": 80.81, "elapsed_time": "11:51:23", "remaining_time": "2:48:59"} +{"current_steps": 3272, "total_steps": 4048, "loss": 0.2402455359697342, "lr": 1.9484230384789702e-06, "epoch": 1.6168293587050537, "percentage": 80.83, "elapsed_time": "11:51:36", "remaining_time": "2:48:45"} +{"current_steps": 3273, "total_steps": 4048, "loss": 0.2947021424770355, "lr": 1.9435800688974005e-06, "epoch": 1.617323612998888, "percentage": 80.85, "elapsed_time": "11:51:49", "remaining_time": "2:48:32"} +{"current_steps": 3274, "total_steps": 4048, "loss": 0.22238701581954956, "lr": 1.938742477666663e-06, "epoch": 1.6178178672927221, "percentage": 80.88, "elapsed_time": "11:52:02", "remaining_time": "2:48:19"} +{"current_steps": 3275, "total_steps": 4048, "loss": 0.25475019216537476, "lr": 1.933910268016269e-06, "epoch": 1.6183121215865564, "percentage": 80.9, "elapsed_time": "11:52:15", "remaining_time": "2:48:06"} +{"current_steps": 3276, "total_steps": 4048, "loss": 0.2316315472126007, "lr": 1.929083443172125e-06, "epoch": 1.6188063758803906, "percentage": 80.93, "elapsed_time": "11:52:27", "remaining_time": "2:47:53"} +{"current_steps": 3277, "total_steps": 4048, "loss": 0.24977952241897583, "lr": 1.9242620063565598e-06, "epoch": 1.6193006301742248, "percentage": 80.95, "elapsed_time": "11:52:41", "remaining_time": "2:47:40"} +{"current_steps": 3278, "total_steps": 4048, "loss": 0.24006152153015137, "lr": 1.9194459607882887e-06, "epoch": 1.619794884468059, "percentage": 80.98, "elapsed_time": "11:52:53", "remaining_time": "2:47:27"} +{"current_steps": 3279, "total_steps": 4048, "loss": 0.26050522923469543, "lr": 1.9146353096824366e-06, "epoch": 1.620289138761893, "percentage": 81.0, "elapsed_time": "11:53:06", "remaining_time": "2:47:14"} +{"current_steps": 3280, "total_steps": 4048, "loss": 0.2698773443698883, "lr": 1.9098300562505266e-06, "epoch": 1.6207833930557272, "percentage": 81.03, "elapsed_time": "11:53:19", "remaining_time": "2:47:01"} +{"current_steps": 3281, "total_steps": 4048, "loss": 0.2627784013748169, "lr": 1.9050302037004765e-06, "epoch": 1.6212776473495614, "percentage": 81.05, "elapsed_time": "11:53:31", "remaining_time": "2:46:48"} +{"current_steps": 3282, "total_steps": 4048, "loss": 0.24261148273944855, "lr": 1.900235755236599e-06, "epoch": 1.6217719016433956, "percentage": 81.08, "elapsed_time": "11:53:44", "remaining_time": "2:46:34"} +{"current_steps": 3283, "total_steps": 4048, "loss": 0.24689635634422302, "lr": 1.8954467140596023e-06, "epoch": 1.6222661559372296, "percentage": 81.1, "elapsed_time": "11:53:56", "remaining_time": "2:46:21"} +{"current_steps": 3284, "total_steps": 4048, "loss": 0.2885867953300476, "lr": 1.890663083366574e-06, "epoch": 1.6227604102310638, "percentage": 81.13, "elapsed_time": "11:54:09", "remaining_time": "2:46:08"} +{"current_steps": 3285, "total_steps": 4048, "loss": 0.2624407112598419, "lr": 1.8858848663510066e-06, "epoch": 1.623254664524898, "percentage": 81.15, "elapsed_time": "11:54:22", "remaining_time": "2:45:55"} +{"current_steps": 3286, "total_steps": 4048, "loss": 0.27705928683280945, "lr": 1.881112066202767e-06, "epoch": 1.6237489188187322, "percentage": 81.18, "elapsed_time": "11:54:35", "remaining_time": "2:45:42"} +{"current_steps": 3287, "total_steps": 4048, "loss": 0.26406094431877136, "lr": 1.8763446861081058e-06, "epoch": 1.6242431731125664, "percentage": 81.2, "elapsed_time": "11:54:47", "remaining_time": "2:45:29"} +{"current_steps": 3288, "total_steps": 4048, "loss": 0.26495790481567383, "lr": 1.8715827292496557e-06, "epoch": 1.6247374274064006, "percentage": 81.23, "elapsed_time": "11:55:00", "remaining_time": "2:45:16"} +{"current_steps": 3289, "total_steps": 4048, "loss": 0.24995195865631104, "lr": 1.8668261988064406e-06, "epoch": 1.6252316817002348, "percentage": 81.25, "elapsed_time": "11:55:13", "remaining_time": "2:45:03"} +{"current_steps": 3290, "total_steps": 4048, "loss": 0.23043034970760345, "lr": 1.8620750979538437e-06, "epoch": 1.625725935994069, "percentage": 81.27, "elapsed_time": "11:55:26", "remaining_time": "2:44:50"} +{"current_steps": 3291, "total_steps": 4048, "loss": 0.2590731978416443, "lr": 1.8573294298636334e-06, "epoch": 1.6262201902879032, "percentage": 81.3, "elapsed_time": "11:55:38", "remaining_time": "2:44:36"} +{"current_steps": 3292, "total_steps": 4048, "loss": 0.24246811866760254, "lr": 1.8525891977039557e-06, "epoch": 1.6267144445817374, "percentage": 81.32, "elapsed_time": "11:55:51", "remaining_time": "2:44:23"} +{"current_steps": 3293, "total_steps": 4048, "loss": 0.2386825680732727, "lr": 1.847854404639311e-06, "epoch": 1.6272086988755716, "percentage": 81.35, "elapsed_time": "11:56:04", "remaining_time": "2:44:10"} +{"current_steps": 3294, "total_steps": 4048, "loss": 0.2243885099887848, "lr": 1.843125053830588e-06, "epoch": 1.6277029531694056, "percentage": 81.37, "elapsed_time": "11:56:17", "remaining_time": "2:43:57"} +{"current_steps": 3295, "total_steps": 4048, "loss": 0.24984796345233917, "lr": 1.838401148435035e-06, "epoch": 1.6281972074632398, "percentage": 81.4, "elapsed_time": "11:56:30", "remaining_time": "2:43:44"} +{"current_steps": 3296, "total_steps": 4048, "loss": 0.22784638404846191, "lr": 1.8336826916062568e-06, "epoch": 1.628691461757074, "percentage": 81.42, "elapsed_time": "11:56:42", "remaining_time": "2:43:31"} +{"current_steps": 3297, "total_steps": 4048, "loss": 0.24812597036361694, "lr": 1.828969686494232e-06, "epoch": 1.6291857160509082, "percentage": 81.45, "elapsed_time": "11:56:55", "remaining_time": "2:43:18"} +{"current_steps": 3298, "total_steps": 4048, "loss": 0.234031543135643, "lr": 1.8242621362452939e-06, "epoch": 1.6296799703447422, "percentage": 81.47, "elapsed_time": "11:57:08", "remaining_time": "2:43:05"} +{"current_steps": 3299, "total_steps": 4048, "loss": 0.22455371916294098, "lr": 1.8195600440021377e-06, "epoch": 1.6301742246385764, "percentage": 81.5, "elapsed_time": "11:57:21", "remaining_time": "2:42:51"} +{"current_steps": 3300, "total_steps": 4048, "loss": 0.22605910897254944, "lr": 1.8148634129038113e-06, "epoch": 1.6306684789324106, "percentage": 81.52, "elapsed_time": "11:57:33", "remaining_time": "2:42:38"} +{"current_steps": 3301, "total_steps": 4048, "loss": 0.2527684271335602, "lr": 1.8101722460857184e-06, "epoch": 1.6311627332262448, "percentage": 81.55, "elapsed_time": "11:57:52", "remaining_time": "2:42:27"} +{"current_steps": 3302, "total_steps": 4048, "loss": 0.24625766277313232, "lr": 1.8054865466796167e-06, "epoch": 1.631656987520079, "percentage": 81.57, "elapsed_time": "11:58:04", "remaining_time": "2:42:13"} +{"current_steps": 3303, "total_steps": 4048, "loss": 0.31236231327056885, "lr": 1.8008063178136125e-06, "epoch": 1.6321512418139132, "percentage": 81.6, "elapsed_time": "11:58:17", "remaining_time": "2:42:00"} +{"current_steps": 3304, "total_steps": 4048, "loss": 0.21256005764007568, "lr": 1.7961315626121566e-06, "epoch": 1.6326454961077475, "percentage": 81.62, "elapsed_time": "11:58:29", "remaining_time": "2:41:47"} +{"current_steps": 3305, "total_steps": 4048, "loss": 0.25238949060440063, "lr": 1.7914622841960482e-06, "epoch": 1.6331397504015817, "percentage": 81.65, "elapsed_time": "11:58:42", "remaining_time": "2:41:34"} +{"current_steps": 3306, "total_steps": 4048, "loss": 0.29630619287490845, "lr": 1.7867984856824382e-06, "epoch": 1.6336340046954159, "percentage": 81.67, "elapsed_time": "11:58:54", "remaining_time": "2:41:21"} +{"current_steps": 3307, "total_steps": 4048, "loss": 0.26159363985061646, "lr": 1.782140170184804e-06, "epoch": 1.63412825898925, "percentage": 81.69, "elapsed_time": "11:59:07", "remaining_time": "2:41:08"} +{"current_steps": 3308, "total_steps": 4048, "loss": 0.22361448407173157, "lr": 1.7774873408129733e-06, "epoch": 1.6346225132830843, "percentage": 81.72, "elapsed_time": "11:59:21", "remaining_time": "2:40:55"} +{"current_steps": 3309, "total_steps": 4048, "loss": 0.23890942335128784, "lr": 1.7728400006731083e-06, "epoch": 1.6351167675769185, "percentage": 81.74, "elapsed_time": "11:59:34", "remaining_time": "2:40:42"} +{"current_steps": 3310, "total_steps": 4048, "loss": 0.23067504167556763, "lr": 1.7681981528677073e-06, "epoch": 1.6356110218707525, "percentage": 81.77, "elapsed_time": "11:59:46", "remaining_time": "2:40:28"} +{"current_steps": 3311, "total_steps": 4048, "loss": 0.24790561199188232, "lr": 1.7635618004956012e-06, "epoch": 1.6361052761645867, "percentage": 81.79, "elapsed_time": "11:59:59", "remaining_time": "2:40:15"} +{"current_steps": 3312, "total_steps": 4048, "loss": 0.2590476870536804, "lr": 1.7589309466519556e-06, "epoch": 1.6365995304584209, "percentage": 81.82, "elapsed_time": "12:00:12", "remaining_time": "2:40:02"} +{"current_steps": 3313, "total_steps": 4048, "loss": 0.26833316683769226, "lr": 1.754305594428254e-06, "epoch": 1.637093784752255, "percentage": 81.84, "elapsed_time": "12:00:25", "remaining_time": "2:39:49"} +{"current_steps": 3314, "total_steps": 4048, "loss": 0.23390671610832214, "lr": 1.749685746912323e-06, "epoch": 1.637588039046089, "percentage": 81.87, "elapsed_time": "12:00:38", "remaining_time": "2:39:36"} +{"current_steps": 3315, "total_steps": 4048, "loss": 0.2760172188282013, "lr": 1.7450714071883079e-06, "epoch": 1.6380822933399233, "percentage": 81.89, "elapsed_time": "12:00:50", "remaining_time": "2:39:23"} +{"current_steps": 3316, "total_steps": 4048, "loss": 0.255672812461853, "lr": 1.7404625783366703e-06, "epoch": 1.6385765476337575, "percentage": 81.92, "elapsed_time": "12:01:03", "remaining_time": "2:39:10"} +{"current_steps": 3317, "total_steps": 4048, "loss": 0.26336947083473206, "lr": 1.7358592634342008e-06, "epoch": 1.6390708019275917, "percentage": 81.94, "elapsed_time": "12:01:16", "remaining_time": "2:38:57"} +{"current_steps": 3318, "total_steps": 4048, "loss": 0.2308199107646942, "lr": 1.7312614655540071e-06, "epoch": 1.639565056221426, "percentage": 81.97, "elapsed_time": "12:01:29", "remaining_time": "2:38:44"} +{"current_steps": 3319, "total_steps": 4048, "loss": 0.24762676656246185, "lr": 1.7266691877655129e-06, "epoch": 1.64005931051526, "percentage": 81.99, "elapsed_time": "12:01:42", "remaining_time": "2:38:31"} +{"current_steps": 3320, "total_steps": 4048, "loss": 0.2175157219171524, "lr": 1.7220824331344577e-06, "epoch": 1.6405535648090943, "percentage": 82.02, "elapsed_time": "12:01:56", "remaining_time": "2:38:18"} +{"current_steps": 3321, "total_steps": 4048, "loss": 0.24319039285182953, "lr": 1.7175012047228956e-06, "epoch": 1.6410478191029285, "percentage": 82.04, "elapsed_time": "12:02:08", "remaining_time": "2:38:05"} +{"current_steps": 3322, "total_steps": 4048, "loss": 0.21708521246910095, "lr": 1.7129255055891813e-06, "epoch": 1.6415420733967627, "percentage": 82.07, "elapsed_time": "12:02:22", "remaining_time": "2:37:52"} +{"current_steps": 3323, "total_steps": 4048, "loss": 0.28576910495758057, "lr": 1.7083553387879969e-06, "epoch": 1.642036327690597, "percentage": 82.09, "elapsed_time": "12:02:34", "remaining_time": "2:37:38"} +{"current_steps": 3324, "total_steps": 4048, "loss": 0.2664312720298767, "lr": 1.703790707370313e-06, "epoch": 1.6425305819844311, "percentage": 82.11, "elapsed_time": "12:02:47", "remaining_time": "2:37:25"} +{"current_steps": 3325, "total_steps": 4048, "loss": 0.23930951952934265, "lr": 1.6992316143834142e-06, "epoch": 1.6430248362782651, "percentage": 82.14, "elapsed_time": "12:03:00", "remaining_time": "2:37:12"} +{"current_steps": 3326, "total_steps": 4048, "loss": 0.2741955518722534, "lr": 1.694678062870886e-06, "epoch": 1.6435190905720993, "percentage": 82.16, "elapsed_time": "12:03:13", "remaining_time": "2:36:59"} +{"current_steps": 3327, "total_steps": 4048, "loss": 0.25177690386772156, "lr": 1.6901300558726142e-06, "epoch": 1.6440133448659335, "percentage": 82.19, "elapsed_time": "12:03:26", "remaining_time": "2:36:46"} +{"current_steps": 3328, "total_steps": 4048, "loss": 0.26517611742019653, "lr": 1.6855875964247837e-06, "epoch": 1.6445075991597677, "percentage": 82.21, "elapsed_time": "12:03:39", "remaining_time": "2:36:33"} +{"current_steps": 3329, "total_steps": 4048, "loss": 0.2294573187828064, "lr": 1.6810506875598776e-06, "epoch": 1.6450018534536017, "percentage": 82.24, "elapsed_time": "12:03:52", "remaining_time": "2:36:20"} +{"current_steps": 3330, "total_steps": 4048, "loss": 0.23062998056411743, "lr": 1.6765193323066653e-06, "epoch": 1.645496107747436, "percentage": 82.26, "elapsed_time": "12:04:05", "remaining_time": "2:36:07"} +{"current_steps": 3331, "total_steps": 4048, "loss": 0.3047422468662262, "lr": 1.6719935336902205e-06, "epoch": 1.6459903620412701, "percentage": 82.29, "elapsed_time": "12:04:18", "remaining_time": "2:35:54"} +{"current_steps": 3332, "total_steps": 4048, "loss": 0.2715694308280945, "lr": 1.6674732947319017e-06, "epoch": 1.6464846163351043, "percentage": 82.31, "elapsed_time": "12:04:30", "remaining_time": "2:35:41"} +{"current_steps": 3333, "total_steps": 4048, "loss": 0.20359721779823303, "lr": 1.6629586184493519e-06, "epoch": 1.6469788706289386, "percentage": 82.34, "elapsed_time": "12:04:44", "remaining_time": "2:35:28"} +{"current_steps": 3334, "total_steps": 4048, "loss": 0.20083262026309967, "lr": 1.6584495078565045e-06, "epoch": 1.6474731249227728, "percentage": 82.36, "elapsed_time": "12:04:56", "remaining_time": "2:35:15"} +{"current_steps": 3335, "total_steps": 4048, "loss": 0.2274707555770874, "lr": 1.6539459659635848e-06, "epoch": 1.647967379216607, "percentage": 82.39, "elapsed_time": "12:05:10", "remaining_time": "2:35:02"} +{"current_steps": 3336, "total_steps": 4048, "loss": 0.2654137909412384, "lr": 1.6494479957770847e-06, "epoch": 1.6484616335104412, "percentage": 82.41, "elapsed_time": "12:05:22", "remaining_time": "2:34:49"} +{"current_steps": 3337, "total_steps": 4048, "loss": 0.24672716856002808, "lr": 1.644955600299788e-06, "epoch": 1.6489558878042754, "percentage": 82.44, "elapsed_time": "12:05:35", "remaining_time": "2:34:36"} +{"current_steps": 3338, "total_steps": 4048, "loss": 0.21563802659511566, "lr": 1.640468782530753e-06, "epoch": 1.6494501420981096, "percentage": 82.46, "elapsed_time": "12:05:48", "remaining_time": "2:34:22"} +{"current_steps": 3339, "total_steps": 4048, "loss": 0.22986169159412384, "lr": 1.6359875454653151e-06, "epoch": 1.6499443963919438, "percentage": 82.49, "elapsed_time": "12:06:01", "remaining_time": "2:34:09"} +{"current_steps": 3340, "total_steps": 4048, "loss": 0.22981731593608856, "lr": 1.6315118920950857e-06, "epoch": 1.650438650685778, "percentage": 82.51, "elapsed_time": "12:06:14", "remaining_time": "2:33:56"} +{"current_steps": 3341, "total_steps": 4048, "loss": 0.25922536849975586, "lr": 1.6270418254079478e-06, "epoch": 1.650932904979612, "percentage": 82.53, "elapsed_time": "12:06:27", "remaining_time": "2:33:43"} +{"current_steps": 3342, "total_steps": 4048, "loss": 0.23273468017578125, "lr": 1.6225773483880503e-06, "epoch": 1.6514271592734462, "percentage": 82.56, "elapsed_time": "12:06:40", "remaining_time": "2:33:30"} +{"current_steps": 3343, "total_steps": 4048, "loss": 0.22988896071910858, "lr": 1.6181184640158165e-06, "epoch": 1.6519214135672804, "percentage": 82.58, "elapsed_time": "12:06:53", "remaining_time": "2:33:17"} +{"current_steps": 3344, "total_steps": 4048, "loss": 0.2628646790981293, "lr": 1.6136651752679333e-06, "epoch": 1.6524156678611144, "percentage": 82.61, "elapsed_time": "12:07:05", "remaining_time": "2:33:04"} +{"current_steps": 3345, "total_steps": 4048, "loss": 0.24670086801052094, "lr": 1.6092174851173526e-06, "epoch": 1.6529099221549486, "percentage": 82.63, "elapsed_time": "12:07:18", "remaining_time": "2:32:51"} +{"current_steps": 3346, "total_steps": 4048, "loss": 0.27845436334609985, "lr": 1.6047753965332902e-06, "epoch": 1.6534041764487828, "percentage": 82.66, "elapsed_time": "12:07:31", "remaining_time": "2:32:38"} +{"current_steps": 3347, "total_steps": 4048, "loss": 0.25297483801841736, "lr": 1.6003389124812185e-06, "epoch": 1.653898430742617, "percentage": 82.68, "elapsed_time": "12:07:44", "remaining_time": "2:32:25"} +{"current_steps": 3348, "total_steps": 4048, "loss": 0.18876859545707703, "lr": 1.595908035922873e-06, "epoch": 1.6543926850364512, "percentage": 82.71, "elapsed_time": "12:07:57", "remaining_time": "2:32:12"} +{"current_steps": 3349, "total_steps": 4048, "loss": 0.23852673172950745, "lr": 1.591482769816246e-06, "epoch": 1.6548869393302854, "percentage": 82.73, "elapsed_time": "12:08:10", "remaining_time": "2:31:59"} +{"current_steps": 3350, "total_steps": 4048, "loss": 0.2569701373577118, "lr": 1.587063117115576e-06, "epoch": 1.6553811936241196, "percentage": 82.76, "elapsed_time": "12:08:24", "remaining_time": "2:31:46"} +{"current_steps": 3351, "total_steps": 4048, "loss": 0.29305699467658997, "lr": 1.582649080771359e-06, "epoch": 1.6558754479179538, "percentage": 82.78, "elapsed_time": "12:08:36", "remaining_time": "2:31:32"} +{"current_steps": 3352, "total_steps": 4048, "loss": 0.28942832350730896, "lr": 1.5782406637303527e-06, "epoch": 1.656369702211788, "percentage": 82.81, "elapsed_time": "12:08:49", "remaining_time": "2:31:19"} +{"current_steps": 3353, "total_steps": 4048, "loss": 0.27491068840026855, "lr": 1.5738378689355439e-06, "epoch": 1.6568639565056222, "percentage": 82.83, "elapsed_time": "12:09:02", "remaining_time": "2:31:06"} +{"current_steps": 3354, "total_steps": 4048, "loss": 0.26730844378471375, "lr": 1.569440699326179e-06, "epoch": 1.6573582107994564, "percentage": 82.86, "elapsed_time": "12:09:15", "remaining_time": "2:30:53"} +{"current_steps": 3355, "total_steps": 4048, "loss": 0.23610982298851013, "lr": 1.5650491578377458e-06, "epoch": 1.6578524650932907, "percentage": 82.88, "elapsed_time": "12:09:28", "remaining_time": "2:30:40"} +{"current_steps": 3356, "total_steps": 4048, "loss": 0.26817262172698975, "lr": 1.5606632474019734e-06, "epoch": 1.6583467193871246, "percentage": 82.91, "elapsed_time": "12:09:41", "remaining_time": "2:30:27"} +{"current_steps": 3357, "total_steps": 4048, "loss": 0.2403341382741928, "lr": 1.556282970946833e-06, "epoch": 1.6588409736809588, "percentage": 82.93, "elapsed_time": "12:09:54", "remaining_time": "2:30:14"} +{"current_steps": 3358, "total_steps": 4048, "loss": 0.24433058500289917, "lr": 1.5519083313965378e-06, "epoch": 1.659335227974793, "percentage": 82.95, "elapsed_time": "12:10:07", "remaining_time": "2:30:01"} +{"current_steps": 3359, "total_steps": 4048, "loss": 0.2526702582836151, "lr": 1.5475393316715282e-06, "epoch": 1.6598294822686273, "percentage": 82.98, "elapsed_time": "12:10:20", "remaining_time": "2:29:48"} +{"current_steps": 3360, "total_steps": 4048, "loss": 0.24032334983348846, "lr": 1.543175974688491e-06, "epoch": 1.6603237365624612, "percentage": 83.0, "elapsed_time": "12:10:33", "remaining_time": "2:29:35"} +{"current_steps": 3361, "total_steps": 4048, "loss": 0.27770349383354187, "lr": 1.5388182633603433e-06, "epoch": 1.6608179908562954, "percentage": 83.03, "elapsed_time": "12:10:46", "remaining_time": "2:29:22"} +{"current_steps": 3362, "total_steps": 4048, "loss": 0.26002752780914307, "lr": 1.534466200596224e-06, "epoch": 1.6613122451501297, "percentage": 83.05, "elapsed_time": "12:10:58", "remaining_time": "2:29:09"} +{"current_steps": 3363, "total_steps": 4048, "loss": 0.2707037329673767, "lr": 1.5301197893015129e-06, "epoch": 1.6618064994439639, "percentage": 83.08, "elapsed_time": "12:11:11", "remaining_time": "2:28:56"} +{"current_steps": 3364, "total_steps": 4048, "loss": 0.27249252796173096, "lr": 1.52577903237781e-06, "epoch": 1.662300753737798, "percentage": 83.1, "elapsed_time": "12:11:24", "remaining_time": "2:28:42"} +{"current_steps": 3365, "total_steps": 4048, "loss": 0.22495020925998688, "lr": 1.5214439327229425e-06, "epoch": 1.6627950080316323, "percentage": 83.13, "elapsed_time": "12:11:37", "remaining_time": "2:28:29"} +{"current_steps": 3366, "total_steps": 4048, "loss": 0.23561973869800568, "lr": 1.5171144932309622e-06, "epoch": 1.6632892623254665, "percentage": 83.15, "elapsed_time": "12:11:50", "remaining_time": "2:28:16"} +{"current_steps": 3367, "total_steps": 4048, "loss": 0.2689869701862335, "lr": 1.512790716792143e-06, "epoch": 1.6637835166193007, "percentage": 83.18, "elapsed_time": "12:12:03", "remaining_time": "2:28:03"} +{"current_steps": 3368, "total_steps": 4048, "loss": 0.22249455749988556, "lr": 1.5084726062929688e-06, "epoch": 1.664277770913135, "percentage": 83.2, "elapsed_time": "12:12:16", "remaining_time": "2:27:50"} +{"current_steps": 3369, "total_steps": 4048, "loss": 0.24586130678653717, "lr": 1.5041601646161585e-06, "epoch": 1.664772025206969, "percentage": 83.23, "elapsed_time": "12:12:29", "remaining_time": "2:27:37"} +{"current_steps": 3370, "total_steps": 4048, "loss": 0.2549409568309784, "lr": 1.499853394640629e-06, "epoch": 1.6652662795008033, "percentage": 83.25, "elapsed_time": "12:12:41", "remaining_time": "2:27:24"} +{"current_steps": 3371, "total_steps": 4048, "loss": 0.2517774999141693, "lr": 1.4955522992415206e-06, "epoch": 1.6657605337946373, "percentage": 83.28, "elapsed_time": "12:12:55", "remaining_time": "2:27:11"} +{"current_steps": 3372, "total_steps": 4048, "loss": 0.2627662420272827, "lr": 1.491256881290184e-06, "epoch": 1.6662547880884715, "percentage": 83.3, "elapsed_time": "12:13:07", "remaining_time": "2:26:58"} +{"current_steps": 3373, "total_steps": 4048, "loss": 0.25203272700309753, "lr": 1.4869671436541788e-06, "epoch": 1.6667490423823057, "percentage": 83.33, "elapsed_time": "12:13:20", "remaining_time": "2:26:45"} +{"current_steps": 3374, "total_steps": 4048, "loss": 0.2206164300441742, "lr": 1.482683089197271e-06, "epoch": 1.66724329667614, "percentage": 83.35, "elapsed_time": "12:13:33", "remaining_time": "2:26:32"} +{"current_steps": 3375, "total_steps": 4048, "loss": 0.2551203966140747, "lr": 1.4784047207794383e-06, "epoch": 1.667737550969974, "percentage": 83.37, "elapsed_time": "12:13:46", "remaining_time": "2:26:19"} +{"current_steps": 3376, "total_steps": 4048, "loss": 0.2592264711856842, "lr": 1.4741320412568505e-06, "epoch": 1.668231805263808, "percentage": 83.4, "elapsed_time": "12:13:59", "remaining_time": "2:26:06"} +{"current_steps": 3377, "total_steps": 4048, "loss": 0.25902658700942993, "lr": 1.4698650534818936e-06, "epoch": 1.6687260595576423, "percentage": 83.42, "elapsed_time": "12:14:12", "remaining_time": "2:25:53"} +{"current_steps": 3378, "total_steps": 4048, "loss": 0.2685459852218628, "lr": 1.4656037603031491e-06, "epoch": 1.6692203138514765, "percentage": 83.45, "elapsed_time": "12:14:25", "remaining_time": "2:25:40"} +{"current_steps": 3379, "total_steps": 4048, "loss": 0.21010839939117432, "lr": 1.4613481645653914e-06, "epoch": 1.6697145681453107, "percentage": 83.47, "elapsed_time": "12:14:37", "remaining_time": "2:25:26"} +{"current_steps": 3380, "total_steps": 4048, "loss": 0.23318082094192505, "lr": 1.4570982691095925e-06, "epoch": 1.670208822439145, "percentage": 83.5, "elapsed_time": "12:14:50", "remaining_time": "2:25:13"} +{"current_steps": 3381, "total_steps": 4048, "loss": 0.25045326352119446, "lr": 1.4528540767729315e-06, "epoch": 1.6707030767329791, "percentage": 83.52, "elapsed_time": "12:15:03", "remaining_time": "2:25:00"} +{"current_steps": 3382, "total_steps": 4048, "loss": 0.2436288446187973, "lr": 1.4486155903887623e-06, "epoch": 1.6711973310268133, "percentage": 83.55, "elapsed_time": "12:15:16", "remaining_time": "2:24:47"} +{"current_steps": 3383, "total_steps": 4048, "loss": 0.20454761385917664, "lr": 1.444382812786641e-06, "epoch": 1.6716915853206475, "percentage": 83.57, "elapsed_time": "12:15:28", "remaining_time": "2:24:34"} +{"current_steps": 3384, "total_steps": 4048, "loss": 0.24906963109970093, "lr": 1.4401557467923089e-06, "epoch": 1.6721858396144818, "percentage": 83.6, "elapsed_time": "12:15:41", "remaining_time": "2:24:21"} +{"current_steps": 3385, "total_steps": 4048, "loss": 0.2552015483379364, "lr": 1.435934395227695e-06, "epoch": 1.672680093908316, "percentage": 83.62, "elapsed_time": "12:15:54", "remaining_time": "2:24:08"} +{"current_steps": 3386, "total_steps": 4048, "loss": 0.2393915057182312, "lr": 1.4317187609109129e-06, "epoch": 1.6731743482021502, "percentage": 83.65, "elapsed_time": "12:16:07", "remaining_time": "2:23:55"} +{"current_steps": 3387, "total_steps": 4048, "loss": 0.2607477009296417, "lr": 1.4275088466562625e-06, "epoch": 1.6736686024959841, "percentage": 83.67, "elapsed_time": "12:16:20", "remaining_time": "2:23:42"} +{"current_steps": 3388, "total_steps": 4048, "loss": 0.23722632229328156, "lr": 1.423304655274218e-06, "epoch": 1.6741628567898184, "percentage": 83.7, "elapsed_time": "12:16:32", "remaining_time": "2:23:28"} +{"current_steps": 3389, "total_steps": 4048, "loss": 0.2614964246749878, "lr": 1.4191061895714398e-06, "epoch": 1.6746571110836526, "percentage": 83.72, "elapsed_time": "12:16:45", "remaining_time": "2:23:15"} +{"current_steps": 3390, "total_steps": 4048, "loss": 0.2727823555469513, "lr": 1.4149134523507634e-06, "epoch": 1.6751513653774868, "percentage": 83.75, "elapsed_time": "12:16:58", "remaining_time": "2:23:02"} +{"current_steps": 3391, "total_steps": 4048, "loss": 0.25176581740379333, "lr": 1.4107264464112003e-06, "epoch": 1.6756456196713208, "percentage": 83.77, "elapsed_time": "12:17:12", "remaining_time": "2:22:49"} +{"current_steps": 3392, "total_steps": 4048, "loss": 0.21339070796966553, "lr": 1.4065451745479352e-06, "epoch": 1.676139873965155, "percentage": 83.79, "elapsed_time": "12:17:24", "remaining_time": "2:22:36"} +{"current_steps": 3393, "total_steps": 4048, "loss": 0.26540419459342957, "lr": 1.4023696395523267e-06, "epoch": 1.6766341282589892, "percentage": 83.82, "elapsed_time": "12:17:37", "remaining_time": "2:22:23"} +{"current_steps": 3394, "total_steps": 4048, "loss": 0.2621360421180725, "lr": 1.3981998442119017e-06, "epoch": 1.6771283825528234, "percentage": 83.84, "elapsed_time": "12:17:50", "remaining_time": "2:22:10"} +{"current_steps": 3395, "total_steps": 4048, "loss": 0.2578747570514679, "lr": 1.3940357913103576e-06, "epoch": 1.6776226368466576, "percentage": 83.87, "elapsed_time": "12:18:03", "remaining_time": "2:21:57"} +{"current_steps": 3396, "total_steps": 4048, "loss": 0.26105010509490967, "lr": 1.3898774836275531e-06, "epoch": 1.6781168911404918, "percentage": 83.89, "elapsed_time": "12:18:15", "remaining_time": "2:21:44"} +{"current_steps": 3397, "total_steps": 4048, "loss": 0.2221919298171997, "lr": 1.3857249239395143e-06, "epoch": 1.678611145434326, "percentage": 83.92, "elapsed_time": "12:18:28", "remaining_time": "2:21:31"} +{"current_steps": 3398, "total_steps": 4048, "loss": 0.2498932033777237, "lr": 1.3815781150184382e-06, "epoch": 1.6791053997281602, "percentage": 83.94, "elapsed_time": "12:18:41", "remaining_time": "2:21:18"} +{"current_steps": 3399, "total_steps": 4048, "loss": 0.29306796193122864, "lr": 1.377437059632668e-06, "epoch": 1.6795996540219944, "percentage": 83.97, "elapsed_time": "12:18:54", "remaining_time": "2:21:05"} +{"current_steps": 3400, "total_steps": 4048, "loss": 0.23804892599582672, "lr": 1.3733017605467158e-06, "epoch": 1.6800939083158286, "percentage": 83.99, "elapsed_time": "12:19:08", "remaining_time": "2:20:52"} +{"current_steps": 3401, "total_steps": 4048, "loss": 0.18528425693511963, "lr": 1.3691722205212465e-06, "epoch": 1.6805881626096628, "percentage": 84.02, "elapsed_time": "12:19:26", "remaining_time": "2:20:40"} +{"current_steps": 3402, "total_steps": 4048, "loss": 0.257534921169281, "lr": 1.365048442313085e-06, "epoch": 1.6810824169034968, "percentage": 84.04, "elapsed_time": "12:19:39", "remaining_time": "2:20:27"} +{"current_steps": 3403, "total_steps": 4048, "loss": 0.2519993782043457, "lr": 1.3609304286752034e-06, "epoch": 1.681576671197331, "percentage": 84.07, "elapsed_time": "12:19:51", "remaining_time": "2:20:13"} +{"current_steps": 3404, "total_steps": 4048, "loss": 0.27830445766448975, "lr": 1.3568181823567328e-06, "epoch": 1.6820709254911652, "percentage": 84.09, "elapsed_time": "12:20:04", "remaining_time": "2:20:00"} +{"current_steps": 3405, "total_steps": 4048, "loss": 0.22532883286476135, "lr": 1.3527117061029438e-06, "epoch": 1.6825651797849994, "percentage": 84.12, "elapsed_time": "12:20:16", "remaining_time": "2:19:47"} +{"current_steps": 3406, "total_steps": 4048, "loss": 0.23230011761188507, "lr": 1.3486110026552668e-06, "epoch": 1.6830594340788334, "percentage": 84.14, "elapsed_time": "12:20:29", "remaining_time": "2:19:34"} +{"current_steps": 3407, "total_steps": 4048, "loss": 0.24105653166770935, "lr": 1.3445160747512743e-06, "epoch": 1.6835536883726676, "percentage": 84.17, "elapsed_time": "12:20:42", "remaining_time": "2:19:21"} +{"current_steps": 3408, "total_steps": 4048, "loss": 0.2946394681930542, "lr": 1.340426925124676e-06, "epoch": 1.6840479426665018, "percentage": 84.19, "elapsed_time": "12:20:55", "remaining_time": "2:19:08"} +{"current_steps": 3409, "total_steps": 4048, "loss": 0.2682989239692688, "lr": 1.3363435565053319e-06, "epoch": 1.684542196960336, "percentage": 84.21, "elapsed_time": "12:21:07", "remaining_time": "2:18:55"} +{"current_steps": 3410, "total_steps": 4048, "loss": 0.2219456285238266, "lr": 1.332265971619241e-06, "epoch": 1.6850364512541702, "percentage": 84.24, "elapsed_time": "12:21:20", "remaining_time": "2:18:42"} +{"current_steps": 3411, "total_steps": 4048, "loss": 0.22532151639461517, "lr": 1.3281941731885396e-06, "epoch": 1.6855307055480044, "percentage": 84.26, "elapsed_time": "12:21:33", "remaining_time": "2:18:29"} +{"current_steps": 3412, "total_steps": 4048, "loss": 0.24166807532310486, "lr": 1.324128163931504e-06, "epoch": 1.6860249598418386, "percentage": 84.29, "elapsed_time": "12:21:46", "remaining_time": "2:18:16"} +{"current_steps": 3413, "total_steps": 4048, "loss": 0.25514671206474304, "lr": 1.3200679465625453e-06, "epoch": 1.6865192141356729, "percentage": 84.31, "elapsed_time": "12:22:00", "remaining_time": "2:18:03"} +{"current_steps": 3414, "total_steps": 4048, "loss": 0.263123482465744, "lr": 1.3160135237922011e-06, "epoch": 1.687013468429507, "percentage": 84.34, "elapsed_time": "12:22:12", "remaining_time": "2:17:49"} +{"current_steps": 3415, "total_steps": 4048, "loss": 0.23763976991176605, "lr": 1.3119648983271527e-06, "epoch": 1.6875077227233413, "percentage": 84.36, "elapsed_time": "12:22:26", "remaining_time": "2:17:37"} +{"current_steps": 3416, "total_steps": 4048, "loss": 0.28645598888397217, "lr": 1.3079220728701991e-06, "epoch": 1.6880019770171755, "percentage": 84.39, "elapsed_time": "12:22:38", "remaining_time": "2:17:23"} +{"current_steps": 3417, "total_steps": 4048, "loss": 0.2269624024629593, "lr": 1.303885050120275e-06, "epoch": 1.6884962313110097, "percentage": 84.41, "elapsed_time": "12:22:51", "remaining_time": "2:17:10"} +{"current_steps": 3418, "total_steps": 4048, "loss": 0.23601466417312622, "lr": 1.2998538327724386e-06, "epoch": 1.6889904856048437, "percentage": 84.44, "elapsed_time": "12:23:04", "remaining_time": "2:16:57"} +{"current_steps": 3419, "total_steps": 4048, "loss": 0.2246169149875641, "lr": 1.2958284235178743e-06, "epoch": 1.6894847398986779, "percentage": 84.46, "elapsed_time": "12:23:17", "remaining_time": "2:16:44"} +{"current_steps": 3420, "total_steps": 4048, "loss": 0.26519715785980225, "lr": 1.2918088250438865e-06, "epoch": 1.689978994192512, "percentage": 84.49, "elapsed_time": "12:23:29", "remaining_time": "2:16:31"} +{"current_steps": 3421, "total_steps": 4048, "loss": 0.2590267062187195, "lr": 1.2877950400339046e-06, "epoch": 1.6904732484863463, "percentage": 84.51, "elapsed_time": "12:23:42", "remaining_time": "2:16:18"} +{"current_steps": 3422, "total_steps": 4048, "loss": 0.2535945773124695, "lr": 1.2837870711674672e-06, "epoch": 1.6909675027801803, "percentage": 84.54, "elapsed_time": "12:23:54", "remaining_time": "2:16:05"} +{"current_steps": 3423, "total_steps": 4048, "loss": 0.21907874941825867, "lr": 1.279784921120244e-06, "epoch": 1.6914617570740145, "percentage": 84.56, "elapsed_time": "12:24:07", "remaining_time": "2:15:52"} +{"current_steps": 3424, "total_steps": 4048, "loss": 0.23314553499221802, "lr": 1.2757885925640124e-06, "epoch": 1.6919560113678487, "percentage": 84.58, "elapsed_time": "12:24:20", "remaining_time": "2:15:39"} +{"current_steps": 3425, "total_steps": 4048, "loss": 0.2288433313369751, "lr": 1.2717980881666615e-06, "epoch": 1.6924502656616829, "percentage": 84.61, "elapsed_time": "12:24:33", "remaining_time": "2:15:25"} +{"current_steps": 3426, "total_steps": 4048, "loss": 0.2285449206829071, "lr": 1.2678134105921924e-06, "epoch": 1.692944519955517, "percentage": 84.63, "elapsed_time": "12:24:46", "remaining_time": "2:15:12"} +{"current_steps": 3427, "total_steps": 4048, "loss": 0.2898653447628021, "lr": 1.2638345625007287e-06, "epoch": 1.6934387742493513, "percentage": 84.66, "elapsed_time": "12:24:59", "remaining_time": "2:14:59"} +{"current_steps": 3428, "total_steps": 4048, "loss": 0.23574519157409668, "lr": 1.2598615465484831e-06, "epoch": 1.6939330285431855, "percentage": 84.68, "elapsed_time": "12:25:11", "remaining_time": "2:14:46"} +{"current_steps": 3429, "total_steps": 4048, "loss": 0.23385417461395264, "lr": 1.2558943653877887e-06, "epoch": 1.6944272828370197, "percentage": 84.71, "elapsed_time": "12:25:24", "remaining_time": "2:14:33"} +{"current_steps": 3430, "total_steps": 4048, "loss": 0.2555482089519501, "lr": 1.2519330216670766e-06, "epoch": 1.694921537130854, "percentage": 84.73, "elapsed_time": "12:25:37", "remaining_time": "2:14:20"} +{"current_steps": 3431, "total_steps": 4048, "loss": 0.22221535444259644, "lr": 1.247977518030885e-06, "epoch": 1.6954157914246881, "percentage": 84.76, "elapsed_time": "12:25:49", "remaining_time": "2:14:07"} +{"current_steps": 3432, "total_steps": 4048, "loss": 0.21753090620040894, "lr": 1.2440278571198516e-06, "epoch": 1.6959100457185223, "percentage": 84.78, "elapsed_time": "12:26:02", "remaining_time": "2:13:54"} +{"current_steps": 3433, "total_steps": 4048, "loss": 0.2352944314479828, "lr": 1.240084041570716e-06, "epoch": 1.6964043000123563, "percentage": 84.81, "elapsed_time": "12:26:15", "remaining_time": "2:13:41"} +{"current_steps": 3434, "total_steps": 4048, "loss": 0.22581814229488373, "lr": 1.2361460740163045e-06, "epoch": 1.6968985543061905, "percentage": 84.83, "elapsed_time": "12:26:28", "remaining_time": "2:13:28"} +{"current_steps": 3435, "total_steps": 4048, "loss": 0.28703421354293823, "lr": 1.2322139570855596e-06, "epoch": 1.6973928086000247, "percentage": 84.86, "elapsed_time": "12:26:41", "remaining_time": "2:13:15"} +{"current_steps": 3436, "total_steps": 4048, "loss": 0.21528789401054382, "lr": 1.2282876934034972e-06, "epoch": 1.697887062893859, "percentage": 84.88, "elapsed_time": "12:26:54", "remaining_time": "2:13:02"} +{"current_steps": 3437, "total_steps": 4048, "loss": 0.2675422430038452, "lr": 1.2243672855912393e-06, "epoch": 1.698381317187693, "percentage": 84.91, "elapsed_time": "12:27:07", "remaining_time": "2:12:49"} +{"current_steps": 3438, "total_steps": 4048, "loss": 0.26681527495384216, "lr": 1.2204527362659913e-06, "epoch": 1.6988755714815271, "percentage": 84.93, "elapsed_time": "12:27:20", "remaining_time": "2:12:35"} +{"current_steps": 3439, "total_steps": 4048, "loss": 0.2436470091342926, "lr": 1.216544048041054e-06, "epoch": 1.6993698257753613, "percentage": 84.96, "elapsed_time": "12:27:33", "remaining_time": "2:12:22"} +{"current_steps": 3440, "total_steps": 4048, "loss": 0.25458425283432007, "lr": 1.212641223525809e-06, "epoch": 1.6998640800691955, "percentage": 84.98, "elapsed_time": "12:27:46", "remaining_time": "2:12:09"} +{"current_steps": 3441, "total_steps": 4048, "loss": 0.24890559911727905, "lr": 1.2087442653257286e-06, "epoch": 1.7003583343630297, "percentage": 85.0, "elapsed_time": "12:27:59", "remaining_time": "2:11:56"} +{"current_steps": 3442, "total_steps": 4048, "loss": 0.26031816005706787, "lr": 1.2048531760423642e-06, "epoch": 1.700852588656864, "percentage": 85.03, "elapsed_time": "12:28:12", "remaining_time": "2:11:43"} +{"current_steps": 3443, "total_steps": 4048, "loss": 0.22184975445270538, "lr": 1.200967958273349e-06, "epoch": 1.7013468429506982, "percentage": 85.05, "elapsed_time": "12:28:26", "remaining_time": "2:11:30"} +{"current_steps": 3444, "total_steps": 4048, "loss": 0.2670953571796417, "lr": 1.1970886146124073e-06, "epoch": 1.7018410972445324, "percentage": 85.08, "elapsed_time": "12:28:38", "remaining_time": "2:11:17"} +{"current_steps": 3445, "total_steps": 4048, "loss": 0.27950525283813477, "lr": 1.1932151476493247e-06, "epoch": 1.7023353515383666, "percentage": 85.1, "elapsed_time": "12:28:51", "remaining_time": "2:11:04"} +{"current_steps": 3446, "total_steps": 4048, "loss": 0.23257380723953247, "lr": 1.1893475599699766e-06, "epoch": 1.7028296058322008, "percentage": 85.13, "elapsed_time": "12:29:04", "remaining_time": "2:10:51"} +{"current_steps": 3447, "total_steps": 4048, "loss": 0.2586575746536255, "lr": 1.1854858541563086e-06, "epoch": 1.703323860126035, "percentage": 85.15, "elapsed_time": "12:29:18", "remaining_time": "2:10:38"} +{"current_steps": 3448, "total_steps": 4048, "loss": 0.2677457928657532, "lr": 1.1816300327863406e-06, "epoch": 1.703818114419869, "percentage": 85.18, "elapsed_time": "12:29:30", "remaining_time": "2:10:25"} +{"current_steps": 3449, "total_steps": 4048, "loss": 0.29866284132003784, "lr": 1.1777800984341637e-06, "epoch": 1.7043123687137032, "percentage": 85.2, "elapsed_time": "12:29:44", "remaining_time": "2:10:12"} +{"current_steps": 3450, "total_steps": 4048, "loss": 0.27279675006866455, "lr": 1.1739360536699397e-06, "epoch": 1.7048066230075374, "percentage": 85.23, "elapsed_time": "12:29:56", "remaining_time": "2:09:59"} +{"current_steps": 3451, "total_steps": 4048, "loss": 0.25695672631263733, "lr": 1.1700979010598945e-06, "epoch": 1.7053008773013716, "percentage": 85.25, "elapsed_time": "12:30:09", "remaining_time": "2:09:46"} +{"current_steps": 3452, "total_steps": 4048, "loss": 0.22578787803649902, "lr": 1.1662656431663278e-06, "epoch": 1.7057951315952056, "percentage": 85.28, "elapsed_time": "12:30:22", "remaining_time": "2:09:33"} +{"current_steps": 3453, "total_steps": 4048, "loss": 0.1946491301059723, "lr": 1.1624392825476016e-06, "epoch": 1.7062893858890398, "percentage": 85.3, "elapsed_time": "12:30:35", "remaining_time": "2:09:20"} +{"current_steps": 3454, "total_steps": 4048, "loss": 0.2099667191505432, "lr": 1.158618821758134e-06, "epoch": 1.706783640182874, "percentage": 85.33, "elapsed_time": "12:30:48", "remaining_time": "2:09:07"} +{"current_steps": 3455, "total_steps": 4048, "loss": 0.22660428285598755, "lr": 1.1548042633484148e-06, "epoch": 1.7072778944767082, "percentage": 85.35, "elapsed_time": "12:31:01", "remaining_time": "2:08:54"} +{"current_steps": 3456, "total_steps": 4048, "loss": 0.27378255128860474, "lr": 1.1509956098649855e-06, "epoch": 1.7077721487705424, "percentage": 85.38, "elapsed_time": "12:31:14", "remaining_time": "2:08:41"} +{"current_steps": 3457, "total_steps": 4048, "loss": 0.2209164947271347, "lr": 1.1471928638504504e-06, "epoch": 1.7082664030643766, "percentage": 85.4, "elapsed_time": "12:31:27", "remaining_time": "2:08:28"} +{"current_steps": 3458, "total_steps": 4048, "loss": 0.24310322105884552, "lr": 1.1433960278434687e-06, "epoch": 1.7087606573582108, "percentage": 85.42, "elapsed_time": "12:31:41", "remaining_time": "2:08:15"} +{"current_steps": 3459, "total_steps": 4048, "loss": 0.23209068179130554, "lr": 1.1396051043787526e-06, "epoch": 1.709254911652045, "percentage": 85.45, "elapsed_time": "12:31:53", "remaining_time": "2:08:02"} +{"current_steps": 3460, "total_steps": 4048, "loss": 0.2514454126358032, "lr": 1.1358200959870703e-06, "epoch": 1.7097491659458792, "percentage": 85.47, "elapsed_time": "12:32:07", "remaining_time": "2:07:49"} +{"current_steps": 3461, "total_steps": 4048, "loss": 0.2580721378326416, "lr": 1.132041005195239e-06, "epoch": 1.7102434202397134, "percentage": 85.5, "elapsed_time": "12:32:20", "remaining_time": "2:07:35"} +{"current_steps": 3462, "total_steps": 4048, "loss": 0.26388949155807495, "lr": 1.1282678345261234e-06, "epoch": 1.7107376745335476, "percentage": 85.52, "elapsed_time": "12:32:33", "remaining_time": "2:07:22"} +{"current_steps": 3463, "total_steps": 4048, "loss": 0.2194654643535614, "lr": 1.1245005864986402e-06, "epoch": 1.7112319288273818, "percentage": 85.55, "elapsed_time": "12:32:46", "remaining_time": "2:07:09"} +{"current_steps": 3464, "total_steps": 4048, "loss": 0.2048814296722412, "lr": 1.1207392636277502e-06, "epoch": 1.7117261831212158, "percentage": 85.57, "elapsed_time": "12:32:59", "remaining_time": "2:06:56"} +{"current_steps": 3465, "total_steps": 4048, "loss": 0.24165832996368408, "lr": 1.1169838684244584e-06, "epoch": 1.71222043741505, "percentage": 85.6, "elapsed_time": "12:33:12", "remaining_time": "2:06:43"} +{"current_steps": 3466, "total_steps": 4048, "loss": 0.2484482377767563, "lr": 1.1132344033958132e-06, "epoch": 1.7127146917088842, "percentage": 85.62, "elapsed_time": "12:33:24", "remaining_time": "2:06:30"} +{"current_steps": 3467, "total_steps": 4048, "loss": 0.2406741827726364, "lr": 1.1094908710449048e-06, "epoch": 1.7132089460027184, "percentage": 85.65, "elapsed_time": "12:33:38", "remaining_time": "2:06:17"} +{"current_steps": 3468, "total_steps": 4048, "loss": 0.2417721152305603, "lr": 1.1057532738708588e-06, "epoch": 1.7137032002965524, "percentage": 85.67, "elapsed_time": "12:33:50", "remaining_time": "2:06:04"} +{"current_steps": 3469, "total_steps": 4048, "loss": 0.26304543018341064, "lr": 1.1020216143688446e-06, "epoch": 1.7141974545903866, "percentage": 85.7, "elapsed_time": "12:34:04", "remaining_time": "2:05:51"} +{"current_steps": 3470, "total_steps": 4048, "loss": 0.30013689398765564, "lr": 1.098295895030066e-06, "epoch": 1.7146917088842208, "percentage": 85.72, "elapsed_time": "12:34:16", "remaining_time": "2:05:38"} +{"current_steps": 3471, "total_steps": 4048, "loss": 0.21451817452907562, "lr": 1.0945761183417569e-06, "epoch": 1.715185963178055, "percentage": 85.75, "elapsed_time": "12:34:30", "remaining_time": "2:05:25"} +{"current_steps": 3472, "total_steps": 4048, "loss": 0.235377699136734, "lr": 1.0908622867871854e-06, "epoch": 1.7156802174718893, "percentage": 85.77, "elapsed_time": "12:34:43", "remaining_time": "2:05:12"} +{"current_steps": 3473, "total_steps": 4048, "loss": 0.23560425639152527, "lr": 1.0871544028456594e-06, "epoch": 1.7161744717657235, "percentage": 85.8, "elapsed_time": "12:34:56", "remaining_time": "2:04:59"} +{"current_steps": 3474, "total_steps": 4048, "loss": 0.2431229054927826, "lr": 1.083452468992503e-06, "epoch": 1.7166687260595577, "percentage": 85.82, "elapsed_time": "12:35:09", "remaining_time": "2:04:46"} +{"current_steps": 3475, "total_steps": 4048, "loss": 0.211553692817688, "lr": 1.0797564876990762e-06, "epoch": 1.7171629803533919, "percentage": 85.84, "elapsed_time": "12:35:22", "remaining_time": "2:04:33"} +{"current_steps": 3476, "total_steps": 4048, "loss": 0.23565953969955444, "lr": 1.0760664614327643e-06, "epoch": 1.717657234647226, "percentage": 85.87, "elapsed_time": "12:35:35", "remaining_time": "2:04:20"} +{"current_steps": 3477, "total_steps": 4048, "loss": 0.2052966058254242, "lr": 1.0723823926569744e-06, "epoch": 1.7181514889410603, "percentage": 85.89, "elapsed_time": "12:35:48", "remaining_time": "2:04:07"} +{"current_steps": 3478, "total_steps": 4048, "loss": 0.24831204116344452, "lr": 1.06870428383114e-06, "epoch": 1.7186457432348945, "percentage": 85.92, "elapsed_time": "12:36:02", "remaining_time": "2:03:54"} +{"current_steps": 3479, "total_steps": 4048, "loss": 0.24706462025642395, "lr": 1.0650321374107142e-06, "epoch": 1.7191399975287285, "percentage": 85.94, "elapsed_time": "12:36:14", "remaining_time": "2:03:41"} +{"current_steps": 3480, "total_steps": 4048, "loss": 0.20845818519592285, "lr": 1.0613659558471644e-06, "epoch": 1.7196342518225627, "percentage": 85.97, "elapsed_time": "12:36:27", "remaining_time": "2:03:28"} +{"current_steps": 3481, "total_steps": 4048, "loss": 0.21599797904491425, "lr": 1.0577057415879887e-06, "epoch": 1.720128506116397, "percentage": 85.99, "elapsed_time": "12:36:41", "remaining_time": "2:03:15"} +{"current_steps": 3482, "total_steps": 4048, "loss": 0.2381049394607544, "lr": 1.054051497076689e-06, "epoch": 1.720622760410231, "percentage": 86.02, "elapsed_time": "12:36:54", "remaining_time": "2:03:02"} +{"current_steps": 3483, "total_steps": 4048, "loss": 0.22402817010879517, "lr": 1.0504032247527874e-06, "epoch": 1.721117014704065, "percentage": 86.04, "elapsed_time": "12:37:07", "remaining_time": "2:02:49"} +{"current_steps": 3484, "total_steps": 4048, "loss": 0.24406251311302185, "lr": 1.0467609270518186e-06, "epoch": 1.7216112689978993, "percentage": 86.07, "elapsed_time": "12:37:20", "remaining_time": "2:02:36"} +{"current_steps": 3485, "total_steps": 4048, "loss": 0.24388936161994934, "lr": 1.0431246064053291e-06, "epoch": 1.7221055232917335, "percentage": 86.09, "elapsed_time": "12:37:33", "remaining_time": "2:02:23"} +{"current_steps": 3486, "total_steps": 4048, "loss": 0.26131671667099, "lr": 1.0394942652408735e-06, "epoch": 1.7225997775855677, "percentage": 86.12, "elapsed_time": "12:37:47", "remaining_time": "2:02:10"} +{"current_steps": 3487, "total_steps": 4048, "loss": 0.247392475605011, "lr": 1.0358699059820188e-06, "epoch": 1.723094031879402, "percentage": 86.14, "elapsed_time": "12:38:00", "remaining_time": "2:01:56"} +{"current_steps": 3488, "total_steps": 4048, "loss": 0.22713768482208252, "lr": 1.0322515310483316e-06, "epoch": 1.7235882861732361, "percentage": 86.17, "elapsed_time": "12:38:12", "remaining_time": "2:01:43"} +{"current_steps": 3489, "total_steps": 4048, "loss": 0.2544357180595398, "lr": 1.0286391428553854e-06, "epoch": 1.7240825404670703, "percentage": 86.19, "elapsed_time": "12:38:26", "remaining_time": "2:01:30"} +{"current_steps": 3490, "total_steps": 4048, "loss": 0.23186656832695007, "lr": 1.0250327438147678e-06, "epoch": 1.7245767947609045, "percentage": 86.22, "elapsed_time": "12:38:39", "remaining_time": "2:01:17"} +{"current_steps": 3491, "total_steps": 4048, "loss": 0.20387035608291626, "lr": 1.0214323363340506e-06, "epoch": 1.7250710490547387, "percentage": 86.24, "elapsed_time": "12:38:52", "remaining_time": "2:01:04"} +{"current_steps": 3492, "total_steps": 4048, "loss": 0.25391846895217896, "lr": 1.017837922816819e-06, "epoch": 1.725565303348573, "percentage": 86.26, "elapsed_time": "12:39:06", "remaining_time": "2:00:51"} +{"current_steps": 3493, "total_steps": 4048, "loss": 0.23214812576770782, "lr": 1.014249505662649e-06, "epoch": 1.7260595576424071, "percentage": 86.29, "elapsed_time": "12:39:19", "remaining_time": "2:00:38"} +{"current_steps": 3494, "total_steps": 4048, "loss": 0.31888365745544434, "lr": 1.0106670872671187e-06, "epoch": 1.7265538119362414, "percentage": 86.31, "elapsed_time": "12:39:32", "remaining_time": "2:00:25"} +{"current_steps": 3495, "total_steps": 4048, "loss": 0.23372362554073334, "lr": 1.0070906700217998e-06, "epoch": 1.7270480662300753, "percentage": 86.34, "elapsed_time": "12:39:45", "remaining_time": "2:00:12"} +{"current_steps": 3496, "total_steps": 4048, "loss": 0.20082907378673553, "lr": 1.0035202563142577e-06, "epoch": 1.7275423205239095, "percentage": 86.36, "elapsed_time": "12:39:59", "remaining_time": "1:59:59"} +{"current_steps": 3497, "total_steps": 4048, "loss": 0.23895825445652008, "lr": 9.99955848528046e-07, "epoch": 1.7280365748177438, "percentage": 86.39, "elapsed_time": "12:40:12", "remaining_time": "1:59:46"} +{"current_steps": 3498, "total_steps": 4048, "loss": 0.30089694261550903, "lr": 9.963974490427153e-07, "epoch": 1.728530829111578, "percentage": 86.41, "elapsed_time": "12:40:25", "remaining_time": "1:59:33"} +{"current_steps": 3499, "total_steps": 4048, "loss": 0.28134891390800476, "lr": 9.928450602338046e-07, "epoch": 1.729025083405412, "percentage": 86.44, "elapsed_time": "12:40:38", "remaining_time": "1:59:20"} +{"current_steps": 3500, "total_steps": 4048, "loss": 0.1947125792503357, "lr": 9.892986844728325e-07, "epoch": 1.7295193376992462, "percentage": 86.46, "elapsed_time": "12:40:51", "remaining_time": "1:59:07"} +{"current_steps": 3501, "total_steps": 4048, "loss": 0.252549409866333, "lr": 9.857583241273116e-07, "epoch": 1.7300135919930804, "percentage": 86.49, "elapsed_time": "12:41:10", "remaining_time": "1:58:55"} +{"current_steps": 3502, "total_steps": 4048, "loss": 0.28061211109161377, "lr": 9.82223981560736e-07, "epoch": 1.7305078462869146, "percentage": 86.51, "elapsed_time": "12:41:23", "remaining_time": "1:58:42"} +{"current_steps": 3503, "total_steps": 4048, "loss": 0.2492327094078064, "lr": 9.786956591325813e-07, "epoch": 1.7310021005807488, "percentage": 86.54, "elapsed_time": "12:41:36", "remaining_time": "1:58:29"} +{"current_steps": 3504, "total_steps": 4048, "loss": 0.20470373332500458, "lr": 9.75173359198307e-07, "epoch": 1.731496354874583, "percentage": 86.56, "elapsed_time": "12:41:49", "remaining_time": "1:58:16"} +{"current_steps": 3505, "total_steps": 4048, "loss": 0.24190351366996765, "lr": 9.716570841093476e-07, "epoch": 1.7319906091684172, "percentage": 86.59, "elapsed_time": "12:42:02", "remaining_time": "1:58:03"} +{"current_steps": 3506, "total_steps": 4048, "loss": 0.28784725069999695, "lr": 9.681468362131209e-07, "epoch": 1.7324848634622514, "percentage": 86.61, "elapsed_time": "12:42:16", "remaining_time": "1:57:50"} +{"current_steps": 3507, "total_steps": 4048, "loss": 0.2676560878753662, "lr": 9.646426178530176e-07, "epoch": 1.7329791177560856, "percentage": 86.64, "elapsed_time": "12:42:28", "remaining_time": "1:57:37"} +{"current_steps": 3508, "total_steps": 4048, "loss": 0.2493928223848343, "lr": 9.611444313684027e-07, "epoch": 1.7334733720499198, "percentage": 86.66, "elapsed_time": "12:42:41", "remaining_time": "1:57:24"} +{"current_steps": 3509, "total_steps": 4048, "loss": 0.23272472620010376, "lr": 9.57652279094613e-07, "epoch": 1.733967626343754, "percentage": 86.68, "elapsed_time": "12:42:55", "remaining_time": "1:57:11"} +{"current_steps": 3510, "total_steps": 4048, "loss": 0.23245804011821747, "lr": 9.541661633629662e-07, "epoch": 1.734461880637588, "percentage": 86.71, "elapsed_time": "12:43:08", "remaining_time": "1:56:58"} +{"current_steps": 3511, "total_steps": 4048, "loss": 0.22367024421691895, "lr": 9.506860865007373e-07, "epoch": 1.7349561349314222, "percentage": 86.73, "elapsed_time": "12:43:21", "remaining_time": "1:56:45"} +{"current_steps": 3512, "total_steps": 4048, "loss": 0.22332677245140076, "lr": 9.472120508311788e-07, "epoch": 1.7354503892252564, "percentage": 86.76, "elapsed_time": "12:43:34", "remaining_time": "1:56:32"} +{"current_steps": 3513, "total_steps": 4048, "loss": 0.28051453828811646, "lr": 9.437440586735081e-07, "epoch": 1.7359446435190906, "percentage": 86.78, "elapsed_time": "12:43:47", "remaining_time": "1:56:19"} +{"current_steps": 3514, "total_steps": 4048, "loss": 0.24815741181373596, "lr": 9.402821123429017e-07, "epoch": 1.7364388978129246, "percentage": 86.81, "elapsed_time": "12:44:00", "remaining_time": "1:56:06"} +{"current_steps": 3515, "total_steps": 4048, "loss": 0.24077603220939636, "lr": 9.368262141505114e-07, "epoch": 1.7369331521067588, "percentage": 86.83, "elapsed_time": "12:44:13", "remaining_time": "1:55:52"} +{"current_steps": 3516, "total_steps": 4048, "loss": 0.24596062302589417, "lr": 9.333763664034457e-07, "epoch": 1.737427406400593, "percentage": 86.86, "elapsed_time": "12:44:25", "remaining_time": "1:55:39"} +{"current_steps": 3517, "total_steps": 4048, "loss": 0.22939634323120117, "lr": 9.299325714047702e-07, "epoch": 1.7379216606944272, "percentage": 86.88, "elapsed_time": "12:44:39", "remaining_time": "1:55:26"} +{"current_steps": 3518, "total_steps": 4048, "loss": 0.24870653450489044, "lr": 9.264948314535116e-07, "epoch": 1.7384159149882614, "percentage": 86.91, "elapsed_time": "12:44:51", "remaining_time": "1:55:13"} +{"current_steps": 3519, "total_steps": 4048, "loss": 0.24589623510837555, "lr": 9.23063148844664e-07, "epoch": 1.7389101692820956, "percentage": 86.93, "elapsed_time": "12:45:05", "remaining_time": "1:55:00"} +{"current_steps": 3520, "total_steps": 4048, "loss": 0.24228474497795105, "lr": 9.196375258691615e-07, "epoch": 1.7394044235759298, "percentage": 86.96, "elapsed_time": "12:45:17", "remaining_time": "1:54:47"} +{"current_steps": 3521, "total_steps": 4048, "loss": 0.24371150135993958, "lr": 9.162179648139047e-07, "epoch": 1.739898677869764, "percentage": 86.98, "elapsed_time": "12:45:31", "remaining_time": "1:54:34"} +{"current_steps": 3522, "total_steps": 4048, "loss": 0.24775750935077667, "lr": 9.128044679617432e-07, "epoch": 1.7403929321635982, "percentage": 87.01, "elapsed_time": "12:45:44", "remaining_time": "1:54:21"} +{"current_steps": 3523, "total_steps": 4048, "loss": 0.2893243432044983, "lr": 9.093970375914784e-07, "epoch": 1.7408871864574325, "percentage": 87.03, "elapsed_time": "12:45:57", "remaining_time": "1:54:08"} +{"current_steps": 3524, "total_steps": 4048, "loss": 0.24014830589294434, "lr": 9.059956759778632e-07, "epoch": 1.7413814407512667, "percentage": 87.06, "elapsed_time": "12:46:10", "remaining_time": "1:53:55"} +{"current_steps": 3525, "total_steps": 4048, "loss": 0.21439003944396973, "lr": 9.026003853915977e-07, "epoch": 1.7418756950451009, "percentage": 87.08, "elapsed_time": "12:46:23", "remaining_time": "1:53:42"} +{"current_steps": 3526, "total_steps": 4048, "loss": 0.23376847803592682, "lr": 8.992111680993265e-07, "epoch": 1.7423699493389349, "percentage": 87.1, "elapsed_time": "12:46:36", "remaining_time": "1:53:29"} +{"current_steps": 3527, "total_steps": 4048, "loss": 0.244795560836792, "lr": 8.958280263636487e-07, "epoch": 1.742864203632769, "percentage": 87.13, "elapsed_time": "12:46:50", "remaining_time": "1:53:16"} +{"current_steps": 3528, "total_steps": 4048, "loss": 0.2513751685619354, "lr": 8.924509624430955e-07, "epoch": 1.7433584579266033, "percentage": 87.15, "elapsed_time": "12:47:02", "remaining_time": "1:53:03"} +{"current_steps": 3529, "total_steps": 4048, "loss": 0.2118893414735794, "lr": 8.890799785921478e-07, "epoch": 1.7438527122204373, "percentage": 87.18, "elapsed_time": "12:47:15", "remaining_time": "1:52:50"} +{"current_steps": 3530, "total_steps": 4048, "loss": 0.2834109365940094, "lr": 8.857150770612288e-07, "epoch": 1.7443469665142715, "percentage": 87.2, "elapsed_time": "12:47:28", "remaining_time": "1:52:37"} +{"current_steps": 3531, "total_steps": 4048, "loss": 0.2546151876449585, "lr": 8.823562600966962e-07, "epoch": 1.7448412208081057, "percentage": 87.23, "elapsed_time": "12:47:41", "remaining_time": "1:52:24"} +{"current_steps": 3532, "total_steps": 4048, "loss": 0.2654607594013214, "lr": 8.790035299408494e-07, "epoch": 1.7453354751019399, "percentage": 87.25, "elapsed_time": "12:47:54", "remaining_time": "1:52:11"} +{"current_steps": 3533, "total_steps": 4048, "loss": 0.2720295786857605, "lr": 8.756568888319239e-07, "epoch": 1.745829729395774, "percentage": 87.28, "elapsed_time": "12:48:07", "remaining_time": "1:51:58"} +{"current_steps": 3534, "total_steps": 4048, "loss": 0.22259725630283356, "lr": 8.723163390040856e-07, "epoch": 1.7463239836896083, "percentage": 87.3, "elapsed_time": "12:48:20", "remaining_time": "1:51:45"} +{"current_steps": 3535, "total_steps": 4048, "loss": 0.22918277978897095, "lr": 8.68981882687443e-07, "epoch": 1.7468182379834425, "percentage": 87.33, "elapsed_time": "12:48:33", "remaining_time": "1:51:32"} +{"current_steps": 3536, "total_steps": 4048, "loss": 0.24396009743213654, "lr": 8.656535221080297e-07, "epoch": 1.7473124922772767, "percentage": 87.35, "elapsed_time": "12:48:47", "remaining_time": "1:51:19"} +{"current_steps": 3537, "total_steps": 4048, "loss": 0.2370900958776474, "lr": 8.623312594878097e-07, "epoch": 1.747806746571111, "percentage": 87.38, "elapsed_time": "12:48:59", "remaining_time": "1:51:05"} +{"current_steps": 3538, "total_steps": 4048, "loss": 0.2785671055316925, "lr": 8.590150970446798e-07, "epoch": 1.748301000864945, "percentage": 87.4, "elapsed_time": "12:49:13", "remaining_time": "1:50:52"} +{"current_steps": 3539, "total_steps": 4048, "loss": 0.29365241527557373, "lr": 8.557050369924624e-07, "epoch": 1.7487952551587793, "percentage": 87.43, "elapsed_time": "12:49:25", "remaining_time": "1:50:39"} +{"current_steps": 3540, "total_steps": 4048, "loss": 0.24052876234054565, "lr": 8.524010815409068e-07, "epoch": 1.7492895094526135, "percentage": 87.45, "elapsed_time": "12:49:39", "remaining_time": "1:50:26"} +{"current_steps": 3541, "total_steps": 4048, "loss": 0.23938694596290588, "lr": 8.49103232895685e-07, "epoch": 1.7497837637464475, "percentage": 87.48, "elapsed_time": "12:49:51", "remaining_time": "1:50:13"} +{"current_steps": 3542, "total_steps": 4048, "loss": 0.2244144231081009, "lr": 8.458114932583961e-07, "epoch": 1.7502780180402817, "percentage": 87.5, "elapsed_time": "12:50:04", "remaining_time": "1:50:00"} +{"current_steps": 3543, "total_steps": 4048, "loss": 0.25028878450393677, "lr": 8.425258648265544e-07, "epoch": 1.750772272334116, "percentage": 87.52, "elapsed_time": "12:50:17", "remaining_time": "1:49:47"} +{"current_steps": 3544, "total_steps": 4048, "loss": 0.23135274648666382, "lr": 8.39246349793602e-07, "epoch": 1.7512665266279501, "percentage": 87.55, "elapsed_time": "12:50:30", "remaining_time": "1:49:34"} +{"current_steps": 3545, "total_steps": 4048, "loss": 0.23874548077583313, "lr": 8.359729503488967e-07, "epoch": 1.751760780921784, "percentage": 87.57, "elapsed_time": "12:50:43", "remaining_time": "1:49:21"} +{"current_steps": 3546, "total_steps": 4048, "loss": 0.2780659794807434, "lr": 8.327056686777102e-07, "epoch": 1.7522550352156183, "percentage": 87.6, "elapsed_time": "12:50:55", "remaining_time": "1:49:08"} +{"current_steps": 3547, "total_steps": 4048, "loss": 0.213335320353508, "lr": 8.294445069612356e-07, "epoch": 1.7527492895094525, "percentage": 87.62, "elapsed_time": "12:51:09", "remaining_time": "1:48:55"} +{"current_steps": 3548, "total_steps": 4048, "loss": 0.23284730315208435, "lr": 8.261894673765757e-07, "epoch": 1.7532435438032867, "percentage": 87.65, "elapsed_time": "12:51:22", "remaining_time": "1:48:42"} +{"current_steps": 3549, "total_steps": 4048, "loss": 0.25429633259773254, "lr": 8.229405520967504e-07, "epoch": 1.753737798097121, "percentage": 87.67, "elapsed_time": "12:51:35", "remaining_time": "1:48:29"} +{"current_steps": 3550, "total_steps": 4048, "loss": 0.2519379258155823, "lr": 8.196977632906877e-07, "epoch": 1.7542320523909551, "percentage": 87.7, "elapsed_time": "12:51:48", "remaining_time": "1:48:16"} +{"current_steps": 3551, "total_steps": 4048, "loss": 0.2510948181152344, "lr": 8.164611031232283e-07, "epoch": 1.7547263066847893, "percentage": 87.72, "elapsed_time": "12:52:02", "remaining_time": "1:48:03"} +{"current_steps": 3552, "total_steps": 4048, "loss": 0.27415433526039124, "lr": 8.132305737551193e-07, "epoch": 1.7552205609786236, "percentage": 87.75, "elapsed_time": "12:52:15", "remaining_time": "1:47:50"} +{"current_steps": 3553, "total_steps": 4048, "loss": 0.26723912358283997, "lr": 8.100061773430179e-07, "epoch": 1.7557148152724578, "percentage": 87.77, "elapsed_time": "12:52:29", "remaining_time": "1:47:37"} +{"current_steps": 3554, "total_steps": 4048, "loss": 0.2710701823234558, "lr": 8.067879160394821e-07, "epoch": 1.756209069566292, "percentage": 87.8, "elapsed_time": "12:52:42", "remaining_time": "1:47:24"} +{"current_steps": 3555, "total_steps": 4048, "loss": 0.23247234523296356, "lr": 8.035757919929765e-07, "epoch": 1.7567033238601262, "percentage": 87.82, "elapsed_time": "12:52:55", "remaining_time": "1:47:11"} +{"current_steps": 3556, "total_steps": 4048, "loss": 0.2514559328556061, "lr": 8.003698073478749e-07, "epoch": 1.7571975781539602, "percentage": 87.85, "elapsed_time": "12:53:07", "remaining_time": "1:46:58"} +{"current_steps": 3557, "total_steps": 4048, "loss": 0.23549199104309082, "lr": 7.971699642444419e-07, "epoch": 1.7576918324477944, "percentage": 87.87, "elapsed_time": "12:53:21", "remaining_time": "1:46:45"} +{"current_steps": 3558, "total_steps": 4048, "loss": 0.24511446058750153, "lr": 7.939762648188476e-07, "epoch": 1.7581860867416286, "percentage": 87.9, "elapsed_time": "12:53:34", "remaining_time": "1:46:32"} +{"current_steps": 3559, "total_steps": 4048, "loss": 0.18705075979232788, "lr": 7.907887112031609e-07, "epoch": 1.7586803410354628, "percentage": 87.92, "elapsed_time": "12:53:47", "remaining_time": "1:46:19"} +{"current_steps": 3560, "total_steps": 4048, "loss": 0.24297048151493073, "lr": 7.876073055253474e-07, "epoch": 1.7591745953292968, "percentage": 87.94, "elapsed_time": "12:54:00", "remaining_time": "1:46:05"} +{"current_steps": 3561, "total_steps": 4048, "loss": 0.239119753241539, "lr": 7.844320499092683e-07, "epoch": 1.759668849623131, "percentage": 87.97, "elapsed_time": "12:54:13", "remaining_time": "1:45:52"} +{"current_steps": 3562, "total_steps": 4048, "loss": 0.2430122196674347, "lr": 7.81262946474679e-07, "epoch": 1.7601631039169652, "percentage": 87.99, "elapsed_time": "12:54:25", "remaining_time": "1:45:39"} +{"current_steps": 3563, "total_steps": 4048, "loss": 0.2785049378871918, "lr": 7.78099997337225e-07, "epoch": 1.7606573582107994, "percentage": 88.02, "elapsed_time": "12:54:38", "remaining_time": "1:45:26"} +{"current_steps": 3564, "total_steps": 4048, "loss": 0.2451494038105011, "lr": 7.749432046084471e-07, "epoch": 1.7611516125046336, "percentage": 88.04, "elapsed_time": "12:54:51", "remaining_time": "1:45:13"} +{"current_steps": 3565, "total_steps": 4048, "loss": 0.20071648061275482, "lr": 7.717925703957785e-07, "epoch": 1.7616458667984678, "percentage": 88.07, "elapsed_time": "12:55:04", "remaining_time": "1:45:00"} +{"current_steps": 3566, "total_steps": 4048, "loss": 0.22308245301246643, "lr": 7.686480968025333e-07, "epoch": 1.762140121092302, "percentage": 88.09, "elapsed_time": "12:55:17", "remaining_time": "1:44:47"} +{"current_steps": 3567, "total_steps": 4048, "loss": 0.26082009077072144, "lr": 7.655097859279192e-07, "epoch": 1.7626343753861362, "percentage": 88.12, "elapsed_time": "12:55:30", "remaining_time": "1:44:34"} +{"current_steps": 3568, "total_steps": 4048, "loss": 0.21026611328125, "lr": 7.623776398670268e-07, "epoch": 1.7631286296799704, "percentage": 88.14, "elapsed_time": "12:55:43", "remaining_time": "1:44:21"} +{"current_steps": 3569, "total_steps": 4048, "loss": 0.23878465592861176, "lr": 7.592516607108324e-07, "epoch": 1.7636228839738046, "percentage": 88.17, "elapsed_time": "12:55:56", "remaining_time": "1:44:08"} +{"current_steps": 3570, "total_steps": 4048, "loss": 0.30288150906562805, "lr": 7.561318505461956e-07, "epoch": 1.7641171382676388, "percentage": 88.19, "elapsed_time": "12:56:09", "remaining_time": "1:43:55"} +{"current_steps": 3571, "total_steps": 4048, "loss": 0.25749915838241577, "lr": 7.530182114558582e-07, "epoch": 1.764611392561473, "percentage": 88.22, "elapsed_time": "12:56:22", "remaining_time": "1:43:42"} +{"current_steps": 3572, "total_steps": 4048, "loss": 0.23799163103103638, "lr": 7.499107455184351e-07, "epoch": 1.765105646855307, "percentage": 88.24, "elapsed_time": "12:56:35", "remaining_time": "1:43:29"} +{"current_steps": 3573, "total_steps": 4048, "loss": 0.2626670002937317, "lr": 7.46809454808436e-07, "epoch": 1.7655999011491412, "percentage": 88.27, "elapsed_time": "12:56:48", "remaining_time": "1:43:16"} +{"current_steps": 3574, "total_steps": 4048, "loss": 0.23273026943206787, "lr": 7.437143413962299e-07, "epoch": 1.7660941554429754, "percentage": 88.29, "elapsed_time": "12:57:01", "remaining_time": "1:43:03"} +{"current_steps": 3575, "total_steps": 4048, "loss": 0.22592151165008545, "lr": 7.406254073480735e-07, "epoch": 1.7665884097368096, "percentage": 88.32, "elapsed_time": "12:57:14", "remaining_time": "1:42:50"} +{"current_steps": 3576, "total_steps": 4048, "loss": 0.2594859004020691, "lr": 7.375426547260944e-07, "epoch": 1.7670826640306436, "percentage": 88.34, "elapsed_time": "12:57:27", "remaining_time": "1:42:37"} +{"current_steps": 3577, "total_steps": 4048, "loss": 0.2161571979522705, "lr": 7.344660855882946e-07, "epoch": 1.7675769183244778, "percentage": 88.36, "elapsed_time": "12:57:40", "remaining_time": "1:42:24"} +{"current_steps": 3578, "total_steps": 4048, "loss": 0.23052990436553955, "lr": 7.313957019885487e-07, "epoch": 1.768071172618312, "percentage": 88.39, "elapsed_time": "12:57:53", "remaining_time": "1:42:10"} +{"current_steps": 3579, "total_steps": 4048, "loss": 0.2309163510799408, "lr": 7.283315059766005e-07, "epoch": 1.7685654269121462, "percentage": 88.41, "elapsed_time": "12:58:07", "remaining_time": "1:41:57"} +{"current_steps": 3580, "total_steps": 4048, "loss": 0.24543863534927368, "lr": 7.252734995980604e-07, "epoch": 1.7690596812059804, "percentage": 88.44, "elapsed_time": "12:58:19", "remaining_time": "1:41:44"} +{"current_steps": 3581, "total_steps": 4048, "loss": 0.27616050839424133, "lr": 7.22221684894413e-07, "epoch": 1.7695539354998147, "percentage": 88.46, "elapsed_time": "12:58:33", "remaining_time": "1:41:31"} +{"current_steps": 3582, "total_steps": 4048, "loss": 0.2247719019651413, "lr": 7.191760639030077e-07, "epoch": 1.7700481897936489, "percentage": 88.49, "elapsed_time": "12:58:45", "remaining_time": "1:41:18"} +{"current_steps": 3583, "total_steps": 4048, "loss": 0.28721702098846436, "lr": 7.161366386570545e-07, "epoch": 1.770542444087483, "percentage": 88.51, "elapsed_time": "12:58:58", "remaining_time": "1:41:05"} +{"current_steps": 3584, "total_steps": 4048, "loss": 0.24191290140151978, "lr": 7.131034111856294e-07, "epoch": 1.7710366983813173, "percentage": 88.54, "elapsed_time": "12:59:11", "remaining_time": "1:40:52"} +{"current_steps": 3585, "total_steps": 4048, "loss": 0.24049970507621765, "lr": 7.100763835136748e-07, "epoch": 1.7715309526751515, "percentage": 88.56, "elapsed_time": "12:59:24", "remaining_time": "1:40:39"} +{"current_steps": 3586, "total_steps": 4048, "loss": 0.255404531955719, "lr": 7.070555576619887e-07, "epoch": 1.7720252069689857, "percentage": 88.59, "elapsed_time": "12:59:37", "remaining_time": "1:40:26"} +{"current_steps": 3587, "total_steps": 4048, "loss": 0.23041129112243652, "lr": 7.040409356472333e-07, "epoch": 1.7725194612628197, "percentage": 88.61, "elapsed_time": "12:59:50", "remaining_time": "1:40:13"} +{"current_steps": 3588, "total_steps": 4048, "loss": 0.2589847147464752, "lr": 7.010325194819278e-07, "epoch": 1.7730137155566539, "percentage": 88.64, "elapsed_time": "13:00:03", "remaining_time": "1:40:00"} +{"current_steps": 3589, "total_steps": 4048, "loss": 0.2604563236236572, "lr": 6.980303111744424e-07, "epoch": 1.773507969850488, "percentage": 88.66, "elapsed_time": "13:00:16", "remaining_time": "1:39:47"} +{"current_steps": 3590, "total_steps": 4048, "loss": 0.26831385493278503, "lr": 6.950343127290138e-07, "epoch": 1.7740022241443223, "percentage": 88.69, "elapsed_time": "13:00:29", "remaining_time": "1:39:34"} +{"current_steps": 3591, "total_steps": 4048, "loss": 0.20475032925605774, "lr": 6.920445261457276e-07, "epoch": 1.7744964784381563, "percentage": 88.71, "elapsed_time": "13:00:42", "remaining_time": "1:39:21"} +{"current_steps": 3592, "total_steps": 4048, "loss": 0.32378682494163513, "lr": 6.890609534205206e-07, "epoch": 1.7749907327319905, "percentage": 88.74, "elapsed_time": "13:00:55", "remaining_time": "1:39:08"} +{"current_steps": 3593, "total_steps": 4048, "loss": 0.2526070177555084, "lr": 6.86083596545184e-07, "epoch": 1.7754849870258247, "percentage": 88.76, "elapsed_time": "13:01:08", "remaining_time": "1:38:55"} +{"current_steps": 3594, "total_steps": 4048, "loss": 0.2467537820339203, "lr": 6.831124575073578e-07, "epoch": 1.775979241319659, "percentage": 88.78, "elapsed_time": "13:01:21", "remaining_time": "1:38:42"} +{"current_steps": 3595, "total_steps": 4048, "loss": 0.2857215404510498, "lr": 6.801475382905332e-07, "epoch": 1.776473495613493, "percentage": 88.81, "elapsed_time": "13:01:33", "remaining_time": "1:38:29"} +{"current_steps": 3596, "total_steps": 4048, "loss": 0.23615087568759918, "lr": 6.771888408740479e-07, "epoch": 1.7769677499073273, "percentage": 88.83, "elapsed_time": "13:01:47", "remaining_time": "1:38:16"} +{"current_steps": 3597, "total_steps": 4048, "loss": 0.2613365054130554, "lr": 6.742363672330854e-07, "epoch": 1.7774620042011615, "percentage": 88.86, "elapsed_time": "13:01:59", "remaining_time": "1:38:02"} +{"current_steps": 3598, "total_steps": 4048, "loss": 0.2558417320251465, "lr": 6.712901193386756e-07, "epoch": 1.7779562584949957, "percentage": 88.88, "elapsed_time": "13:02:12", "remaining_time": "1:37:49"} +{"current_steps": 3599, "total_steps": 4048, "loss": 0.2683117091655731, "lr": 6.683500991576919e-07, "epoch": 1.77845051278883, "percentage": 88.91, "elapsed_time": "13:02:25", "remaining_time": "1:37:36"} +{"current_steps": 3600, "total_steps": 4048, "loss": 0.2546064555644989, "lr": 6.654163086528487e-07, "epoch": 1.7789447670826641, "percentage": 88.93, "elapsed_time": "13:02:38", "remaining_time": "1:37:23"} +{"current_steps": 3601, "total_steps": 4048, "loss": 0.2683906555175781, "lr": 6.624887497827004e-07, "epoch": 1.7794390213764983, "percentage": 88.96, "elapsed_time": "13:02:56", "remaining_time": "1:37:11"} +{"current_steps": 3602, "total_steps": 4048, "loss": 0.23260846734046936, "lr": 6.595674245016492e-07, "epoch": 1.7799332756703325, "percentage": 88.98, "elapsed_time": "13:03:09", "remaining_time": "1:36:58"} +{"current_steps": 3603, "total_steps": 4048, "loss": 0.22884608805179596, "lr": 6.566523347599252e-07, "epoch": 1.7804275299641665, "percentage": 89.01, "elapsed_time": "13:03:22", "remaining_time": "1:36:45"} +{"current_steps": 3604, "total_steps": 4048, "loss": 0.24236485362052917, "lr": 6.537434825036027e-07, "epoch": 1.7809217842580007, "percentage": 89.03, "elapsed_time": "13:03:35", "remaining_time": "1:36:32"} +{"current_steps": 3605, "total_steps": 4048, "loss": 0.29543957114219666, "lr": 6.508408696745893e-07, "epoch": 1.781416038551835, "percentage": 89.06, "elapsed_time": "13:03:48", "remaining_time": "1:36:19"} +{"current_steps": 3606, "total_steps": 4048, "loss": 0.24011383950710297, "lr": 6.479444982106276e-07, "epoch": 1.781910292845669, "percentage": 89.08, "elapsed_time": "13:04:01", "remaining_time": "1:36:05"} +{"current_steps": 3607, "total_steps": 4048, "loss": 0.248407244682312, "lr": 6.450543700452949e-07, "epoch": 1.7824045471395031, "percentage": 89.11, "elapsed_time": "13:04:14", "remaining_time": "1:35:52"} +{"current_steps": 3608, "total_steps": 4048, "loss": 0.2405746728181839, "lr": 6.421704871080004e-07, "epoch": 1.7828988014333373, "percentage": 89.13, "elapsed_time": "13:04:26", "remaining_time": "1:35:39"} +{"current_steps": 3609, "total_steps": 4048, "loss": 0.24601790308952332, "lr": 6.392928513239804e-07, "epoch": 1.7833930557271716, "percentage": 89.16, "elapsed_time": "13:04:40", "remaining_time": "1:35:26"} +{"current_steps": 3610, "total_steps": 4048, "loss": 0.20030242204666138, "lr": 6.36421464614303e-07, "epoch": 1.7838873100210058, "percentage": 89.18, "elapsed_time": "13:04:52", "remaining_time": "1:35:13"} +{"current_steps": 3611, "total_steps": 4048, "loss": 0.23858311772346497, "lr": 6.335563288958691e-07, "epoch": 1.78438156431484, "percentage": 89.2, "elapsed_time": "13:05:06", "remaining_time": "1:35:00"} +{"current_steps": 3612, "total_steps": 4048, "loss": 0.2330242097377777, "lr": 6.306974460813986e-07, "epoch": 1.7848758186086742, "percentage": 89.23, "elapsed_time": "13:05:18", "remaining_time": "1:34:47"} +{"current_steps": 3613, "total_steps": 4048, "loss": 0.25513261556625366, "lr": 6.278448180794416e-07, "epoch": 1.7853700729025084, "percentage": 89.25, "elapsed_time": "13:05:31", "remaining_time": "1:34:34"} +{"current_steps": 3614, "total_steps": 4048, "loss": 0.2298405021429062, "lr": 6.249984467943737e-07, "epoch": 1.7858643271963426, "percentage": 89.28, "elapsed_time": "13:05:44", "remaining_time": "1:34:21"} +{"current_steps": 3615, "total_steps": 4048, "loss": 0.22120623290538788, "lr": 6.221583341263893e-07, "epoch": 1.7863585814901768, "percentage": 89.3, "elapsed_time": "13:05:57", "remaining_time": "1:34:08"} +{"current_steps": 3616, "total_steps": 4048, "loss": 0.26976969838142395, "lr": 6.193244819715072e-07, "epoch": 1.786852835784011, "percentage": 89.33, "elapsed_time": "13:06:10", "remaining_time": "1:33:55"} +{"current_steps": 3617, "total_steps": 4048, "loss": 0.24354586005210876, "lr": 6.164968922215697e-07, "epoch": 1.7873470900778452, "percentage": 89.35, "elapsed_time": "13:06:23", "remaining_time": "1:33:42"} +{"current_steps": 3618, "total_steps": 4048, "loss": 0.2849498689174652, "lr": 6.136755667642302e-07, "epoch": 1.7878413443716792, "percentage": 89.38, "elapsed_time": "13:06:35", "remaining_time": "1:33:29"} +{"current_steps": 3619, "total_steps": 4048, "loss": 0.2431584596633911, "lr": 6.10860507482971e-07, "epoch": 1.7883355986655134, "percentage": 89.4, "elapsed_time": "13:06:48", "remaining_time": "1:33:16"} +{"current_steps": 3620, "total_steps": 4048, "loss": 0.2384781688451767, "lr": 6.080517162570809e-07, "epoch": 1.7888298529593476, "percentage": 89.43, "elapsed_time": "13:07:01", "remaining_time": "1:33:03"} +{"current_steps": 3621, "total_steps": 4048, "loss": 0.23782339692115784, "lr": 6.052491949616712e-07, "epoch": 1.7893241072531818, "percentage": 89.45, "elapsed_time": "13:07:14", "remaining_time": "1:32:49"} +{"current_steps": 3622, "total_steps": 4048, "loss": 0.23293447494506836, "lr": 6.024529454676631e-07, "epoch": 1.7898183615470158, "percentage": 89.48, "elapsed_time": "13:07:27", "remaining_time": "1:32:36"} +{"current_steps": 3623, "total_steps": 4048, "loss": 0.21202662587165833, "lr": 5.996629696417955e-07, "epoch": 1.79031261584085, "percentage": 89.5, "elapsed_time": "13:07:39", "remaining_time": "1:32:23"} +{"current_steps": 3624, "total_steps": 4048, "loss": 0.27971768379211426, "lr": 5.968792693466141e-07, "epoch": 1.7908068701346842, "percentage": 89.53, "elapsed_time": "13:07:52", "remaining_time": "1:32:10"} +{"current_steps": 3625, "total_steps": 4048, "loss": 0.2433638721704483, "lr": 5.94101846440478e-07, "epoch": 1.7913011244285184, "percentage": 89.55, "elapsed_time": "13:08:05", "remaining_time": "1:31:57"} +{"current_steps": 3626, "total_steps": 4048, "loss": 0.21812602877616882, "lr": 5.91330702777555e-07, "epoch": 1.7917953787223526, "percentage": 89.58, "elapsed_time": "13:08:18", "remaining_time": "1:31:44"} +{"current_steps": 3627, "total_steps": 4048, "loss": 0.2135028839111328, "lr": 5.88565840207822e-07, "epoch": 1.7922896330161868, "percentage": 89.6, "elapsed_time": "13:08:31", "remaining_time": "1:31:31"} +{"current_steps": 3628, "total_steps": 4048, "loss": 0.23919226229190826, "lr": 5.858072605770626e-07, "epoch": 1.792783887310021, "percentage": 89.62, "elapsed_time": "13:08:44", "remaining_time": "1:31:18"} +{"current_steps": 3629, "total_steps": 4048, "loss": 0.2495008111000061, "lr": 5.830549657268614e-07, "epoch": 1.7932781416038552, "percentage": 89.65, "elapsed_time": "13:08:57", "remaining_time": "1:31:05"} +{"current_steps": 3630, "total_steps": 4048, "loss": 0.2531805634498596, "lr": 5.80308957494613e-07, "epoch": 1.7937723958976894, "percentage": 89.67, "elapsed_time": "13:09:10", "remaining_time": "1:30:52"} +{"current_steps": 3631, "total_steps": 4048, "loss": 0.22644619643688202, "lr": 5.775692377135156e-07, "epoch": 1.7942666501915236, "percentage": 89.7, "elapsed_time": "13:09:23", "remaining_time": "1:30:39"} +{"current_steps": 3632, "total_steps": 4048, "loss": 0.2264411598443985, "lr": 5.748358082125638e-07, "epoch": 1.7947609044853579, "percentage": 89.72, "elapsed_time": "13:09:36", "remaining_time": "1:30:26"} +{"current_steps": 3633, "total_steps": 4048, "loss": 0.2663921117782593, "lr": 5.721086708165568e-07, "epoch": 1.7952551587791918, "percentage": 89.75, "elapsed_time": "13:09:49", "remaining_time": "1:30:13"} +{"current_steps": 3634, "total_steps": 4048, "loss": 0.2398051619529724, "lr": 5.693878273460951e-07, "epoch": 1.795749413073026, "percentage": 89.77, "elapsed_time": "13:10:02", "remaining_time": "1:30:00"} +{"current_steps": 3635, "total_steps": 4048, "loss": 0.28781580924987793, "lr": 5.6667327961757e-07, "epoch": 1.7962436673668603, "percentage": 89.8, "elapsed_time": "13:10:15", "remaining_time": "1:29:47"} +{"current_steps": 3636, "total_steps": 4048, "loss": 0.2232055813074112, "lr": 5.639650294431787e-07, "epoch": 1.7967379216606945, "percentage": 89.82, "elapsed_time": "13:10:28", "remaining_time": "1:29:34"} +{"current_steps": 3637, "total_steps": 4048, "loss": 0.23214340209960938, "lr": 5.612630786309103e-07, "epoch": 1.7972321759545284, "percentage": 89.85, "elapsed_time": "13:10:41", "remaining_time": "1:29:21"} +{"current_steps": 3638, "total_steps": 4048, "loss": 0.21598659455776215, "lr": 5.585674289845467e-07, "epoch": 1.7977264302483627, "percentage": 89.87, "elapsed_time": "13:10:54", "remaining_time": "1:29:08"} +{"current_steps": 3639, "total_steps": 4048, "loss": 0.2760176956653595, "lr": 5.558780823036658e-07, "epoch": 1.7982206845421969, "percentage": 89.9, "elapsed_time": "13:11:07", "remaining_time": "1:28:55"} +{"current_steps": 3640, "total_steps": 4048, "loss": 0.2641429901123047, "lr": 5.531950403836373e-07, "epoch": 1.798714938836031, "percentage": 89.92, "elapsed_time": "13:11:19", "remaining_time": "1:28:41"} +{"current_steps": 3641, "total_steps": 4048, "loss": 0.2407502382993698, "lr": 5.505183050156204e-07, "epoch": 1.7992091931298653, "percentage": 89.95, "elapsed_time": "13:11:32", "remaining_time": "1:28:28"} +{"current_steps": 3642, "total_steps": 4048, "loss": 0.19910940527915955, "lr": 5.478478779865682e-07, "epoch": 1.7997034474236995, "percentage": 89.97, "elapsed_time": "13:11:45", "remaining_time": "1:28:15"} +{"current_steps": 3643, "total_steps": 4048, "loss": 0.2716234624385834, "lr": 5.451837610792166e-07, "epoch": 1.8001977017175337, "percentage": 90.0, "elapsed_time": "13:11:58", "remaining_time": "1:28:02"} +{"current_steps": 3644, "total_steps": 4048, "loss": 0.2784198224544525, "lr": 5.42525956072093e-07, "epoch": 1.800691956011368, "percentage": 90.02, "elapsed_time": "13:12:10", "remaining_time": "1:27:49"} +{"current_steps": 3645, "total_steps": 4048, "loss": 0.2277904599905014, "lr": 5.398744647395104e-07, "epoch": 1.801186210305202, "percentage": 90.04, "elapsed_time": "13:12:23", "remaining_time": "1:27:36"} +{"current_steps": 3646, "total_steps": 4048, "loss": 0.26788002252578735, "lr": 5.372292888515684e-07, "epoch": 1.8016804645990363, "percentage": 90.07, "elapsed_time": "13:12:36", "remaining_time": "1:27:23"} +{"current_steps": 3647, "total_steps": 4048, "loss": 0.22452175617218018, "lr": 5.345904301741445e-07, "epoch": 1.8021747188928705, "percentage": 90.09, "elapsed_time": "13:12:49", "remaining_time": "1:27:10"} +{"current_steps": 3648, "total_steps": 4048, "loss": 0.2337179332971573, "lr": 5.319578904689071e-07, "epoch": 1.8026689731867047, "percentage": 90.12, "elapsed_time": "13:13:02", "remaining_time": "1:26:57"} +{"current_steps": 3649, "total_steps": 4048, "loss": 0.2614130973815918, "lr": 5.293316714932983e-07, "epoch": 1.8031632274805387, "percentage": 90.14, "elapsed_time": "13:13:15", "remaining_time": "1:26:44"} +{"current_steps": 3650, "total_steps": 4048, "loss": 0.2577320635318756, "lr": 5.267117750005468e-07, "epoch": 1.803657481774373, "percentage": 90.17, "elapsed_time": "13:13:28", "remaining_time": "1:26:31"} +{"current_steps": 3651, "total_steps": 4048, "loss": 0.2058672308921814, "lr": 5.24098202739658e-07, "epoch": 1.8041517360682071, "percentage": 90.19, "elapsed_time": "13:13:41", "remaining_time": "1:26:18"} +{"current_steps": 3652, "total_steps": 4048, "loss": 0.25223514437675476, "lr": 5.214909564554138e-07, "epoch": 1.8046459903620413, "percentage": 90.22, "elapsed_time": "13:13:54", "remaining_time": "1:26:05"} +{"current_steps": 3653, "total_steps": 4048, "loss": 0.25651872158050537, "lr": 5.188900378883765e-07, "epoch": 1.8051402446558753, "percentage": 90.24, "elapsed_time": "13:14:07", "remaining_time": "1:25:52"} +{"current_steps": 3654, "total_steps": 4048, "loss": 0.257855623960495, "lr": 5.162954487748828e-07, "epoch": 1.8056344989497095, "percentage": 90.27, "elapsed_time": "13:14:20", "remaining_time": "1:25:39"} +{"current_steps": 3655, "total_steps": 4048, "loss": 0.22942093014717102, "lr": 5.137071908470381e-07, "epoch": 1.8061287532435437, "percentage": 90.29, "elapsed_time": "13:14:33", "remaining_time": "1:25:26"} +{"current_steps": 3656, "total_steps": 4048, "loss": 0.25629153847694397, "lr": 5.111252658327326e-07, "epoch": 1.806623007537378, "percentage": 90.32, "elapsed_time": "13:14:47", "remaining_time": "1:25:13"} +{"current_steps": 3657, "total_steps": 4048, "loss": 0.23882299661636353, "lr": 5.085496754556207e-07, "epoch": 1.8071172618312121, "percentage": 90.34, "elapsed_time": "13:14:59", "remaining_time": "1:24:59"} +{"current_steps": 3658, "total_steps": 4048, "loss": 0.2323160469532013, "lr": 5.059804214351283e-07, "epoch": 1.8076115161250463, "percentage": 90.37, "elapsed_time": "13:15:12", "remaining_time": "1:24:46"} +{"current_steps": 3659, "total_steps": 4048, "loss": 0.2080869972705841, "lr": 5.034175054864531e-07, "epoch": 1.8081057704188805, "percentage": 90.39, "elapsed_time": "13:15:25", "remaining_time": "1:24:33"} +{"current_steps": 3660, "total_steps": 4048, "loss": 0.22439511120319366, "lr": 5.008609293205624e-07, "epoch": 1.8086000247127147, "percentage": 90.42, "elapsed_time": "13:15:38", "remaining_time": "1:24:20"} +{"current_steps": 3661, "total_steps": 4048, "loss": 0.2527809739112854, "lr": 4.983106946441885e-07, "epoch": 1.809094279006549, "percentage": 90.44, "elapsed_time": "13:15:51", "remaining_time": "1:24:07"} +{"current_steps": 3662, "total_steps": 4048, "loss": 0.2149294763803482, "lr": 4.957668031598328e-07, "epoch": 1.8095885333003832, "percentage": 90.46, "elapsed_time": "13:16:04", "remaining_time": "1:23:54"} +{"current_steps": 3663, "total_steps": 4048, "loss": 0.2471565306186676, "lr": 4.932292565657615e-07, "epoch": 1.8100827875942174, "percentage": 90.49, "elapsed_time": "13:16:17", "remaining_time": "1:23:41"} +{"current_steps": 3664, "total_steps": 4048, "loss": 0.25820282101631165, "lr": 4.906980565560004e-07, "epoch": 1.8105770418880514, "percentage": 90.51, "elapsed_time": "13:16:29", "remaining_time": "1:23:28"} +{"current_steps": 3665, "total_steps": 4048, "loss": 0.2815645933151245, "lr": 4.881732048203469e-07, "epoch": 1.8110712961818856, "percentage": 90.54, "elapsed_time": "13:16:42", "remaining_time": "1:23:15"} +{"current_steps": 3666, "total_steps": 4048, "loss": 0.23443330824375153, "lr": 4.856547030443559e-07, "epoch": 1.8115655504757198, "percentage": 90.56, "elapsed_time": "13:16:55", "remaining_time": "1:23:02"} +{"current_steps": 3667, "total_steps": 4048, "loss": 0.2452373206615448, "lr": 4.831425529093403e-07, "epoch": 1.812059804769554, "percentage": 90.59, "elapsed_time": "13:17:08", "remaining_time": "1:22:49"} +{"current_steps": 3668, "total_steps": 4048, "loss": 0.21815839409828186, "lr": 4.806367560923764e-07, "epoch": 1.812554059063388, "percentage": 90.61, "elapsed_time": "13:17:21", "remaining_time": "1:22:36"} +{"current_steps": 3669, "total_steps": 4048, "loss": 0.23436316847801208, "lr": 4.781373142663003e-07, "epoch": 1.8130483133572222, "percentage": 90.64, "elapsed_time": "13:17:34", "remaining_time": "1:22:23"} +{"current_steps": 3670, "total_steps": 4048, "loss": 0.18917132914066315, "lr": 4.75644229099701e-07, "epoch": 1.8135425676510564, "percentage": 90.66, "elapsed_time": "13:17:47", "remaining_time": "1:22:10"} +{"current_steps": 3671, "total_steps": 4048, "loss": 0.24570351839065552, "lr": 4.7315750225692905e-07, "epoch": 1.8140368219448906, "percentage": 90.69, "elapsed_time": "13:18:01", "remaining_time": "1:21:57"} +{"current_steps": 3672, "total_steps": 4048, "loss": 0.23367956280708313, "lr": 4.7067713539808543e-07, "epoch": 1.8145310762387248, "percentage": 90.71, "elapsed_time": "13:18:13", "remaining_time": "1:21:44"} +{"current_steps": 3673, "total_steps": 4048, "loss": 0.24563322961330414, "lr": 4.682031301790291e-07, "epoch": 1.815025330532559, "percentage": 90.74, "elapsed_time": "13:18:27", "remaining_time": "1:21:31"} +{"current_steps": 3674, "total_steps": 4048, "loss": 0.2425815761089325, "lr": 4.6573548825137204e-07, "epoch": 1.8155195848263932, "percentage": 90.76, "elapsed_time": "13:18:40", "remaining_time": "1:21:18"} +{"current_steps": 3675, "total_steps": 4048, "loss": 0.2173803597688675, "lr": 4.632742112624744e-07, "epoch": 1.8160138391202274, "percentage": 90.79, "elapsed_time": "13:18:52", "remaining_time": "1:21:05"} +{"current_steps": 3676, "total_steps": 4048, "loss": 0.2665477395057678, "lr": 4.6081930085544734e-07, "epoch": 1.8165080934140616, "percentage": 90.81, "elapsed_time": "13:19:06", "remaining_time": "1:20:51"} +{"current_steps": 3677, "total_steps": 4048, "loss": 0.23834756016731262, "lr": 4.5837075866915994e-07, "epoch": 1.8170023477078958, "percentage": 90.83, "elapsed_time": "13:19:18", "remaining_time": "1:20:38"} +{"current_steps": 3678, "total_steps": 4048, "loss": 0.2479294240474701, "lr": 4.55928586338219e-07, "epoch": 1.81749660200173, "percentage": 90.86, "elapsed_time": "13:19:32", "remaining_time": "1:20:25"} +{"current_steps": 3679, "total_steps": 4048, "loss": 0.24136531352996826, "lr": 4.5349278549298716e-07, "epoch": 1.8179908562955642, "percentage": 90.88, "elapsed_time": "13:19:44", "remaining_time": "1:20:12"} +{"current_steps": 3680, "total_steps": 4048, "loss": 0.24397623538970947, "lr": 4.510633577595669e-07, "epoch": 1.8184851105893982, "percentage": 90.91, "elapsed_time": "13:19:57", "remaining_time": "1:19:59"} +{"current_steps": 3681, "total_steps": 4048, "loss": 0.27078694105148315, "lr": 4.48640304759812e-07, "epoch": 1.8189793648832324, "percentage": 90.93, "elapsed_time": "13:20:10", "remaining_time": "1:19:46"} +{"current_steps": 3682, "total_steps": 4048, "loss": 0.2544251084327698, "lr": 4.4622362811131745e-07, "epoch": 1.8194736191770666, "percentage": 90.96, "elapsed_time": "13:20:23", "remaining_time": "1:19:33"} +{"current_steps": 3683, "total_steps": 4048, "loss": 0.2528873682022095, "lr": 4.4381332942742384e-07, "epoch": 1.8199678734709008, "percentage": 90.98, "elapsed_time": "13:20:36", "remaining_time": "1:19:20"} +{"current_steps": 3684, "total_steps": 4048, "loss": 0.25487592816352844, "lr": 4.414094103172084e-07, "epoch": 1.8204621277647348, "percentage": 91.01, "elapsed_time": "13:20:49", "remaining_time": "1:19:07"} +{"current_steps": 3685, "total_steps": 4048, "loss": 0.22061187028884888, "lr": 4.3901187238549414e-07, "epoch": 1.820956382058569, "percentage": 91.03, "elapsed_time": "13:21:02", "remaining_time": "1:18:54"} +{"current_steps": 3686, "total_steps": 4048, "loss": 0.2793615758419037, "lr": 4.366207172328452e-07, "epoch": 1.8214506363524032, "percentage": 91.06, "elapsed_time": "13:21:14", "remaining_time": "1:18:41"} +{"current_steps": 3687, "total_steps": 4048, "loss": 0.2323140949010849, "lr": 4.342359464555612e-07, "epoch": 1.8219448906462374, "percentage": 91.08, "elapsed_time": "13:21:27", "remaining_time": "1:18:28"} +{"current_steps": 3688, "total_steps": 4048, "loss": 0.2616409659385681, "lr": 4.3185756164568104e-07, "epoch": 1.8224391449400716, "percentage": 91.11, "elapsed_time": "13:21:40", "remaining_time": "1:18:15"} +{"current_steps": 3689, "total_steps": 4048, "loss": 0.203874871134758, "lr": 4.294855643909812e-07, "epoch": 1.8229333992339058, "percentage": 91.13, "elapsed_time": "13:21:53", "remaining_time": "1:18:02"} +{"current_steps": 3690, "total_steps": 4048, "loss": 0.2272878736257553, "lr": 4.271199562749717e-07, "epoch": 1.82342765352774, "percentage": 91.16, "elapsed_time": "13:22:05", "remaining_time": "1:17:49"} +{"current_steps": 3691, "total_steps": 4048, "loss": 0.23728047311306, "lr": 4.247607388769004e-07, "epoch": 1.8239219078215743, "percentage": 91.18, "elapsed_time": "13:22:18", "remaining_time": "1:17:36"} +{"current_steps": 3692, "total_steps": 4048, "loss": 0.2570911943912506, "lr": 4.2240791377174737e-07, "epoch": 1.8244161621154085, "percentage": 91.21, "elapsed_time": "13:22:31", "remaining_time": "1:17:22"} +{"current_steps": 3693, "total_steps": 4048, "loss": 0.24265727400779724, "lr": 4.200614825302207e-07, "epoch": 1.8249104164092427, "percentage": 91.23, "elapsed_time": "13:22:44", "remaining_time": "1:17:09"} +{"current_steps": 3694, "total_steps": 4048, "loss": 0.24822816252708435, "lr": 4.177214467187707e-07, "epoch": 1.8254046707030769, "percentage": 91.25, "elapsed_time": "13:22:56", "remaining_time": "1:16:56"} +{"current_steps": 3695, "total_steps": 4048, "loss": 0.23382046818733215, "lr": 4.153878078995677e-07, "epoch": 1.8258989249969109, "percentage": 91.28, "elapsed_time": "13:23:10", "remaining_time": "1:16:43"} +{"current_steps": 3696, "total_steps": 4048, "loss": 0.27590304613113403, "lr": 4.130605676305166e-07, "epoch": 1.826393179290745, "percentage": 91.3, "elapsed_time": "13:23:22", "remaining_time": "1:16:30"} +{"current_steps": 3697, "total_steps": 4048, "loss": 0.25702038407325745, "lr": 4.1073972746525026e-07, "epoch": 1.8268874335845793, "percentage": 91.33, "elapsed_time": "13:23:36", "remaining_time": "1:16:17"} +{"current_steps": 3698, "total_steps": 4048, "loss": 0.28980135917663574, "lr": 4.0842528895312707e-07, "epoch": 1.8273816878784135, "percentage": 91.35, "elapsed_time": "13:23:48", "remaining_time": "1:16:04"} +{"current_steps": 3699, "total_steps": 4048, "loss": 0.22739271819591522, "lr": 4.0611725363923435e-07, "epoch": 1.8278759421722475, "percentage": 91.38, "elapsed_time": "13:24:01", "remaining_time": "1:15:51"} +{"current_steps": 3700, "total_steps": 4048, "loss": 0.26396334171295166, "lr": 4.038156230643853e-07, "epoch": 1.8283701964660817, "percentage": 91.4, "elapsed_time": "13:24:14", "remaining_time": "1:15:38"} +{"current_steps": 3701, "total_steps": 4048, "loss": 0.25548964738845825, "lr": 4.015203987651106e-07, "epoch": 1.8288644507599159, "percentage": 91.43, "elapsed_time": "13:24:33", "remaining_time": "1:15:26"} +{"current_steps": 3702, "total_steps": 4048, "loss": 0.22227105498313904, "lr": 3.992315822736725e-07, "epoch": 1.82935870505375, "percentage": 91.45, "elapsed_time": "13:24:46", "remaining_time": "1:15:13"} +{"current_steps": 3703, "total_steps": 4048, "loss": 0.30854254961013794, "lr": 3.969491751180543e-07, "epoch": 1.8298529593475843, "percentage": 91.48, "elapsed_time": "13:24:59", "remaining_time": "1:14:59"} +{"current_steps": 3704, "total_steps": 4048, "loss": 0.27471429109573364, "lr": 3.946731788219538e-07, "epoch": 1.8303472136414185, "percentage": 91.5, "elapsed_time": "13:25:12", "remaining_time": "1:14:46"} +{"current_steps": 3705, "total_steps": 4048, "loss": 0.2317768633365631, "lr": 3.924035949047955e-07, "epoch": 1.8308414679352527, "percentage": 91.53, "elapsed_time": "13:25:25", "remaining_time": "1:14:33"} +{"current_steps": 3706, "total_steps": 4048, "loss": 0.2450723946094513, "lr": 3.901404248817231e-07, "epoch": 1.831335722229087, "percentage": 91.55, "elapsed_time": "13:25:38", "remaining_time": "1:14:20"} +{"current_steps": 3707, "total_steps": 4048, "loss": 0.2428039014339447, "lr": 3.878836702635935e-07, "epoch": 1.8318299765229211, "percentage": 91.58, "elapsed_time": "13:25:51", "remaining_time": "1:14:07"} +{"current_steps": 3708, "total_steps": 4048, "loss": 0.27869629859924316, "lr": 3.856333325569861e-07, "epoch": 1.8323242308167553, "percentage": 91.6, "elapsed_time": "13:26:05", "remaining_time": "1:13:54"} +{"current_steps": 3709, "total_steps": 4048, "loss": 0.21661749482154846, "lr": 3.8338941326419353e-07, "epoch": 1.8328184851105895, "percentage": 91.63, "elapsed_time": "13:26:18", "remaining_time": "1:13:41"} +{"current_steps": 3710, "total_steps": 4048, "loss": 0.2655249834060669, "lr": 3.8115191388322206e-07, "epoch": 1.8333127394044237, "percentage": 91.65, "elapsed_time": "13:26:31", "remaining_time": "1:13:28"} +{"current_steps": 3711, "total_steps": 4048, "loss": 0.2281903475522995, "lr": 3.7892083590779784e-07, "epoch": 1.8338069936982577, "percentage": 91.67, "elapsed_time": "13:26:44", "remaining_time": "1:13:15"} +{"current_steps": 3712, "total_steps": 4048, "loss": 0.24545446038246155, "lr": 3.7669618082735504e-07, "epoch": 1.834301247992092, "percentage": 91.7, "elapsed_time": "13:26:57", "remaining_time": "1:13:02"} +{"current_steps": 3713, "total_steps": 4048, "loss": 0.24749556183815002, "lr": 3.7447795012704237e-07, "epoch": 1.8347955022859261, "percentage": 91.72, "elapsed_time": "13:27:10", "remaining_time": "1:12:49"} +{"current_steps": 3714, "total_steps": 4048, "loss": 0.26234689354896545, "lr": 3.722661452877163e-07, "epoch": 1.8352897565797601, "percentage": 91.75, "elapsed_time": "13:27:23", "remaining_time": "1:12:36"} +{"current_steps": 3715, "total_steps": 4048, "loss": 0.21348389983177185, "lr": 3.700607677859491e-07, "epoch": 1.8357840108735943, "percentage": 91.77, "elapsed_time": "13:27:36", "remaining_time": "1:12:23"} +{"current_steps": 3716, "total_steps": 4048, "loss": 0.2527744770050049, "lr": 3.6786181909401864e-07, "epoch": 1.8362782651674285, "percentage": 91.8, "elapsed_time": "13:27:49", "remaining_time": "1:12:10"} +{"current_steps": 3717, "total_steps": 4048, "loss": 0.2175026535987854, "lr": 3.6566930067991056e-07, "epoch": 1.8367725194612627, "percentage": 91.82, "elapsed_time": "13:28:02", "remaining_time": "1:11:57"} +{"current_steps": 3718, "total_steps": 4048, "loss": 0.2847272753715515, "lr": 3.6348321400731967e-07, "epoch": 1.837266773755097, "percentage": 91.85, "elapsed_time": "13:28:15", "remaining_time": "1:11:44"} +{"current_steps": 3719, "total_steps": 4048, "loss": 0.2549072504043579, "lr": 3.613035605356463e-07, "epoch": 1.8377610280489312, "percentage": 91.87, "elapsed_time": "13:28:27", "remaining_time": "1:11:31"} +{"current_steps": 3720, "total_steps": 4048, "loss": 0.24534013867378235, "lr": 3.591303417199965e-07, "epoch": 1.8382552823427654, "percentage": 91.9, "elapsed_time": "13:28:41", "remaining_time": "1:11:18"} +{"current_steps": 3721, "total_steps": 4048, "loss": 0.25336408615112305, "lr": 3.5696355901117865e-07, "epoch": 1.8387495366365996, "percentage": 91.92, "elapsed_time": "13:28:54", "remaining_time": "1:11:05"} +{"current_steps": 3722, "total_steps": 4048, "loss": 0.2787632346153259, "lr": 3.548032138557056e-07, "epoch": 1.8392437909304338, "percentage": 91.95, "elapsed_time": "13:29:06", "remaining_time": "1:10:52"} +{"current_steps": 3723, "total_steps": 4048, "loss": 0.22364875674247742, "lr": 3.5264930769579595e-07, "epoch": 1.839738045224268, "percentage": 91.97, "elapsed_time": "13:29:20", "remaining_time": "1:10:39"} +{"current_steps": 3724, "total_steps": 4048, "loss": 0.2526230216026306, "lr": 3.5050184196936285e-07, "epoch": 1.8402322995181022, "percentage": 92.0, "elapsed_time": "13:29:32", "remaining_time": "1:10:25"} +{"current_steps": 3725, "total_steps": 4048, "loss": 0.2412932962179184, "lr": 3.483608181100262e-07, "epoch": 1.8407265538119364, "percentage": 92.02, "elapsed_time": "13:29:46", "remaining_time": "1:10:12"} +{"current_steps": 3726, "total_steps": 4048, "loss": 0.28693705797195435, "lr": 3.462262375471026e-07, "epoch": 1.8412208081057704, "percentage": 92.05, "elapsed_time": "13:29:59", "remaining_time": "1:09:59"} +{"current_steps": 3727, "total_steps": 4048, "loss": 0.2600281834602356, "lr": 3.4409810170560667e-07, "epoch": 1.8417150623996046, "percentage": 92.07, "elapsed_time": "13:30:12", "remaining_time": "1:09:46"} +{"current_steps": 3728, "total_steps": 4048, "loss": 0.24885150790214539, "lr": 3.4197641200625185e-07, "epoch": 1.8422093166934388, "percentage": 92.09, "elapsed_time": "13:30:24", "remaining_time": "1:09:33"} +{"current_steps": 3729, "total_steps": 4048, "loss": 0.27185115218162537, "lr": 3.398611698654497e-07, "epoch": 1.842703570987273, "percentage": 92.12, "elapsed_time": "13:30:37", "remaining_time": "1:09:20"} +{"current_steps": 3730, "total_steps": 4048, "loss": 0.2999323010444641, "lr": 3.377523766953006e-07, "epoch": 1.843197825281107, "percentage": 92.14, "elapsed_time": "13:30:50", "remaining_time": "1:09:07"} +{"current_steps": 3731, "total_steps": 4048, "loss": 0.22807806730270386, "lr": 3.356500339036106e-07, "epoch": 1.8436920795749412, "percentage": 92.17, "elapsed_time": "13:31:03", "remaining_time": "1:08:54"} +{"current_steps": 3732, "total_steps": 4048, "loss": 0.23006726801395416, "lr": 3.3355414289387155e-07, "epoch": 1.8441863338687754, "percentage": 92.19, "elapsed_time": "13:31:16", "remaining_time": "1:08:41"} +{"current_steps": 3733, "total_steps": 4048, "loss": 0.25261276960372925, "lr": 3.314647050652686e-07, "epoch": 1.8446805881626096, "percentage": 92.22, "elapsed_time": "13:31:29", "remaining_time": "1:08:28"} +{"current_steps": 3734, "total_steps": 4048, "loss": 0.2484148144721985, "lr": 3.293817218126827e-07, "epoch": 1.8451748424564438, "percentage": 92.24, "elapsed_time": "13:31:42", "remaining_time": "1:08:15"} +{"current_steps": 3735, "total_steps": 4048, "loss": 0.2472834438085556, "lr": 3.273051945266836e-07, "epoch": 1.845669096750278, "percentage": 92.27, "elapsed_time": "13:31:56", "remaining_time": "1:08:02"} +{"current_steps": 3736, "total_steps": 4048, "loss": 0.20510706305503845, "lr": 3.2523512459352923e-07, "epoch": 1.8461633510441122, "percentage": 92.29, "elapsed_time": "13:32:08", "remaining_time": "1:07:49"} +{"current_steps": 3737, "total_steps": 4048, "loss": 0.2331993281841278, "lr": 3.231715133951707e-07, "epoch": 1.8466576053379464, "percentage": 92.32, "elapsed_time": "13:32:21", "remaining_time": "1:07:36"} +{"current_steps": 3738, "total_steps": 4048, "loss": 0.2704228162765503, "lr": 3.211143623092461e-07, "epoch": 1.8471518596317806, "percentage": 92.34, "elapsed_time": "13:32:34", "remaining_time": "1:07:23"} +{"current_steps": 3739, "total_steps": 4048, "loss": 0.2514714002609253, "lr": 3.190636727090768e-07, "epoch": 1.8476461139256148, "percentage": 92.37, "elapsed_time": "13:32:47", "remaining_time": "1:07:10"} +{"current_steps": 3740, "total_steps": 4048, "loss": 0.2396089732646942, "lr": 3.170194459636777e-07, "epoch": 1.848140368219449, "percentage": 92.39, "elapsed_time": "13:33:00", "remaining_time": "1:06:57"} +{"current_steps": 3741, "total_steps": 4048, "loss": 0.266484797000885, "lr": 3.149816834377428e-07, "epoch": 1.848634622513283, "percentage": 92.42, "elapsed_time": "13:33:13", "remaining_time": "1:06:44"} +{"current_steps": 3742, "total_steps": 4048, "loss": 0.24549749493598938, "lr": 3.129503864916539e-07, "epoch": 1.8491288768071172, "percentage": 92.44, "elapsed_time": "13:33:26", "remaining_time": "1:06:31"} +{"current_steps": 3743, "total_steps": 4048, "loss": 0.2659090757369995, "lr": 3.1092555648147615e-07, "epoch": 1.8496231311009514, "percentage": 92.47, "elapsed_time": "13:33:39", "remaining_time": "1:06:18"} +{"current_steps": 3744, "total_steps": 4048, "loss": 0.2756732702255249, "lr": 3.0890719475895615e-07, "epoch": 1.8501173853947857, "percentage": 92.49, "elapsed_time": "13:33:52", "remaining_time": "1:06:05"} +{"current_steps": 3745, "total_steps": 4048, "loss": 0.2568710148334503, "lr": 3.068953026715238e-07, "epoch": 1.8506116396886196, "percentage": 92.51, "elapsed_time": "13:34:05", "remaining_time": "1:05:51"} +{"current_steps": 3746, "total_steps": 4048, "loss": 0.2255566120147705, "lr": 3.048898815622914e-07, "epoch": 1.8511058939824538, "percentage": 92.54, "elapsed_time": "13:34:18", "remaining_time": "1:05:38"} +{"current_steps": 3747, "total_steps": 4048, "loss": 0.2083941102027893, "lr": 3.028909327700458e-07, "epoch": 1.851600148276288, "percentage": 92.56, "elapsed_time": "13:34:31", "remaining_time": "1:05:25"} +{"current_steps": 3748, "total_steps": 4048, "loss": 0.20739290118217468, "lr": 3.0089845762926063e-07, "epoch": 1.8520944025701223, "percentage": 92.59, "elapsed_time": "13:34:44", "remaining_time": "1:05:12"} +{"current_steps": 3749, "total_steps": 4048, "loss": 0.21835210919380188, "lr": 2.989124574700819e-07, "epoch": 1.8525886568639565, "percentage": 92.61, "elapsed_time": "13:34:57", "remaining_time": "1:04:59"} +{"current_steps": 3750, "total_steps": 4048, "loss": 0.2170596569776535, "lr": 2.969329336183335e-07, "epoch": 1.8530829111577907, "percentage": 92.64, "elapsed_time": "13:35:10", "remaining_time": "1:04:46"} +{"current_steps": 3751, "total_steps": 4048, "loss": 0.23584111034870148, "lr": 2.949598873955184e-07, "epoch": 1.8535771654516249, "percentage": 92.66, "elapsed_time": "13:35:23", "remaining_time": "1:04:33"} +{"current_steps": 3752, "total_steps": 4048, "loss": 0.2690342664718628, "lr": 2.9299332011881623e-07, "epoch": 1.854071419745459, "percentage": 92.69, "elapsed_time": "13:35:36", "remaining_time": "1:04:20"} +{"current_steps": 3753, "total_steps": 4048, "loss": 0.2499091923236847, "lr": 2.9103323310107566e-07, "epoch": 1.8545656740392933, "percentage": 92.71, "elapsed_time": "13:35:49", "remaining_time": "1:04:07"} +{"current_steps": 3754, "total_steps": 4048, "loss": 0.23112377524375916, "lr": 2.8907962765082567e-07, "epoch": 1.8550599283331275, "percentage": 92.74, "elapsed_time": "13:36:02", "remaining_time": "1:03:54"} +{"current_steps": 3755, "total_steps": 4048, "loss": 0.25203657150268555, "lr": 2.8713250507226285e-07, "epoch": 1.8555541826269617, "percentage": 92.76, "elapsed_time": "13:36:15", "remaining_time": "1:03:41"} +{"current_steps": 3756, "total_steps": 4048, "loss": 0.2468508780002594, "lr": 2.8519186666526086e-07, "epoch": 1.856048436920796, "percentage": 92.79, "elapsed_time": "13:36:28", "remaining_time": "1:03:28"} +{"current_steps": 3757, "total_steps": 4048, "loss": 0.22745928168296814, "lr": 2.8325771372536e-07, "epoch": 1.85654269121463, "percentage": 92.81, "elapsed_time": "13:36:41", "remaining_time": "1:03:15"} +{"current_steps": 3758, "total_steps": 4048, "loss": 0.23090660572052002, "lr": 2.8133004754377525e-07, "epoch": 1.857036945508464, "percentage": 92.84, "elapsed_time": "13:36:54", "remaining_time": "1:03:02"} +{"current_steps": 3759, "total_steps": 4048, "loss": 0.27513352036476135, "lr": 2.7940886940738707e-07, "epoch": 1.8575311998022983, "percentage": 92.86, "elapsed_time": "13:37:08", "remaining_time": "1:02:49"} +{"current_steps": 3760, "total_steps": 4048, "loss": 0.25791019201278687, "lr": 2.774941805987474e-07, "epoch": 1.8580254540961325, "percentage": 92.89, "elapsed_time": "13:37:20", "remaining_time": "1:02:36"} +{"current_steps": 3761, "total_steps": 4048, "loss": 0.2703961730003357, "lr": 2.75585982396076e-07, "epoch": 1.8585197083899665, "percentage": 92.91, "elapsed_time": "13:37:33", "remaining_time": "1:02:23"} +{"current_steps": 3762, "total_steps": 4048, "loss": 0.2557608485221863, "lr": 2.736842760732561e-07, "epoch": 1.8590139626838007, "percentage": 92.93, "elapsed_time": "13:37:46", "remaining_time": "1:02:10"} +{"current_steps": 3763, "total_steps": 4048, "loss": 0.26276740431785583, "lr": 2.717890628998421e-07, "epoch": 1.859508216977635, "percentage": 92.96, "elapsed_time": "13:37:59", "remaining_time": "1:01:57"} +{"current_steps": 3764, "total_steps": 4048, "loss": 0.3033446967601776, "lr": 2.699003441410508e-07, "epoch": 1.8600024712714691, "percentage": 92.98, "elapsed_time": "13:38:12", "remaining_time": "1:01:44"} +{"current_steps": 3765, "total_steps": 4048, "loss": 0.2513597905635834, "lr": 2.680181210577637e-07, "epoch": 1.8604967255653033, "percentage": 93.01, "elapsed_time": "13:38:25", "remaining_time": "1:01:31"} +{"current_steps": 3766, "total_steps": 4048, "loss": 0.22935059666633606, "lr": 2.661423949065267e-07, "epoch": 1.8609909798591375, "percentage": 93.03, "elapsed_time": "13:38:38", "remaining_time": "1:01:18"} +{"current_steps": 3767, "total_steps": 4048, "loss": 0.2585369348526001, "lr": 2.6427316693954596e-07, "epoch": 1.8614852341529717, "percentage": 93.06, "elapsed_time": "13:38:51", "remaining_time": "1:01:05"} +{"current_steps": 3768, "total_steps": 4048, "loss": 0.25701645016670227, "lr": 2.6241043840469104e-07, "epoch": 1.861979488446806, "percentage": 93.08, "elapsed_time": "13:39:04", "remaining_time": "1:00:51"} +{"current_steps": 3769, "total_steps": 4048, "loss": 0.24622182548046112, "lr": 2.605542105454961e-07, "epoch": 1.8624737427406401, "percentage": 93.11, "elapsed_time": "13:39:17", "remaining_time": "1:00:38"} +{"current_steps": 3770, "total_steps": 4048, "loss": 0.2650758624076843, "lr": 2.5870448460114994e-07, "epoch": 1.8629679970344744, "percentage": 93.13, "elapsed_time": "13:39:30", "remaining_time": "1:00:25"} +{"current_steps": 3771, "total_steps": 4048, "loss": 0.2364269644021988, "lr": 2.568612618065036e-07, "epoch": 1.8634622513283086, "percentage": 93.16, "elapsed_time": "13:39:43", "remaining_time": "1:00:12"} +{"current_steps": 3772, "total_steps": 4048, "loss": 0.23226915299892426, "lr": 2.5502454339206617e-07, "epoch": 1.8639565056221425, "percentage": 93.18, "elapsed_time": "13:39:56", "remaining_time": "0:59:59"} +{"current_steps": 3773, "total_steps": 4048, "loss": 0.23077306151390076, "lr": 2.5319433058400565e-07, "epoch": 1.8644507599159768, "percentage": 93.21, "elapsed_time": "13:40:09", "remaining_time": "0:59:46"} +{"current_steps": 3774, "total_steps": 4048, "loss": 0.23707103729248047, "lr": 2.5137062460414476e-07, "epoch": 1.864945014209811, "percentage": 93.23, "elapsed_time": "13:40:22", "remaining_time": "0:59:33"} +{"current_steps": 3775, "total_steps": 4048, "loss": 0.268571138381958, "lr": 2.4955342666996505e-07, "epoch": 1.8654392685036452, "percentage": 93.26, "elapsed_time": "13:40:35", "remaining_time": "0:59:20"} +{"current_steps": 3776, "total_steps": 4048, "loss": 0.21469517052173615, "lr": 2.4774273799459847e-07, "epoch": 1.8659335227974791, "percentage": 93.28, "elapsed_time": "13:40:48", "remaining_time": "0:59:07"} +{"current_steps": 3777, "total_steps": 4048, "loss": 0.2513999938964844, "lr": 2.45938559786838e-07, "epoch": 1.8664277770913134, "percentage": 93.31, "elapsed_time": "13:41:01", "remaining_time": "0:58:54"} +{"current_steps": 3778, "total_steps": 4048, "loss": 0.23660680651664734, "lr": 2.44140893251128e-07, "epoch": 1.8669220313851476, "percentage": 93.33, "elapsed_time": "13:41:14", "remaining_time": "0:58:41"} +{"current_steps": 3779, "total_steps": 4048, "loss": 0.24594557285308838, "lr": 2.423497395875618e-07, "epoch": 1.8674162856789818, "percentage": 93.35, "elapsed_time": "13:41:27", "remaining_time": "0:58:28"} +{"current_steps": 3780, "total_steps": 4048, "loss": 0.2725435793399811, "lr": 2.405650999918896e-07, "epoch": 1.867910539972816, "percentage": 93.38, "elapsed_time": "13:41:41", "remaining_time": "0:58:15"} +{"current_steps": 3781, "total_steps": 4048, "loss": 0.25718316435813904, "lr": 2.3878697565551167e-07, "epoch": 1.8684047942666502, "percentage": 93.4, "elapsed_time": "13:41:54", "remaining_time": "0:58:02"} +{"current_steps": 3782, "total_steps": 4048, "loss": 0.2546181082725525, "lr": 2.3701536776547851e-07, "epoch": 1.8688990485604844, "percentage": 93.43, "elapsed_time": "13:42:07", "remaining_time": "0:57:49"} +{"current_steps": 3783, "total_steps": 4048, "loss": 0.22146770358085632, "lr": 2.3525027750448959e-07, "epoch": 1.8693933028543186, "percentage": 93.45, "elapsed_time": "13:42:20", "remaining_time": "0:57:36"} +{"current_steps": 3784, "total_steps": 4048, "loss": 0.23873519897460938, "lr": 2.3349170605089456e-07, "epoch": 1.8698875571481528, "percentage": 93.48, "elapsed_time": "13:42:32", "remaining_time": "0:57:23"} +{"current_steps": 3785, "total_steps": 4048, "loss": 0.2530808746814728, "lr": 2.3173965457868875e-07, "epoch": 1.870381811441987, "percentage": 93.5, "elapsed_time": "13:42:46", "remaining_time": "0:57:10"} +{"current_steps": 3786, "total_steps": 4048, "loss": 0.21616236865520477, "lr": 2.2999412425751987e-07, "epoch": 1.8708760657358212, "percentage": 93.53, "elapsed_time": "13:42:59", "remaining_time": "0:56:57"} +{"current_steps": 3787, "total_steps": 4048, "loss": 0.21596969664096832, "lr": 2.2825511625267583e-07, "epoch": 1.8713703200296554, "percentage": 93.55, "elapsed_time": "13:43:12", "remaining_time": "0:56:44"} +{"current_steps": 3788, "total_steps": 4048, "loss": 0.25873616337776184, "lr": 2.265226317250957e-07, "epoch": 1.8718645743234894, "percentage": 93.58, "elapsed_time": "13:43:25", "remaining_time": "0:56:31"} +{"current_steps": 3789, "total_steps": 4048, "loss": 0.21096865832805634, "lr": 2.247966718313599e-07, "epoch": 1.8723588286173236, "percentage": 93.6, "elapsed_time": "13:43:38", "remaining_time": "0:56:18"} +{"current_steps": 3790, "total_steps": 4048, "loss": 0.2159111499786377, "lr": 2.230772377236956e-07, "epoch": 1.8728530829111578, "percentage": 93.63, "elapsed_time": "13:43:51", "remaining_time": "0:56:04"} +{"current_steps": 3791, "total_steps": 4048, "loss": 0.2264566719532013, "lr": 2.213643305499724e-07, "epoch": 1.8733473372049918, "percentage": 93.65, "elapsed_time": "13:44:04", "remaining_time": "0:55:51"} +{"current_steps": 3792, "total_steps": 4048, "loss": 0.216034397482872, "lr": 2.1965795145370338e-07, "epoch": 1.873841591498826, "percentage": 93.68, "elapsed_time": "13:44:17", "remaining_time": "0:55:38"} +{"current_steps": 3793, "total_steps": 4048, "loss": 0.22257745265960693, "lr": 2.1795810157404063e-07, "epoch": 1.8743358457926602, "percentage": 93.7, "elapsed_time": "13:44:31", "remaining_time": "0:55:25"} +{"current_steps": 3794, "total_steps": 4048, "loss": 0.2569161653518677, "lr": 2.1626478204578082e-07, "epoch": 1.8748301000864944, "percentage": 93.73, "elapsed_time": "13:44:43", "remaining_time": "0:55:12"} +{"current_steps": 3795, "total_steps": 4048, "loss": 0.24172556400299072, "lr": 2.1457799399936087e-07, "epoch": 1.8753243543803286, "percentage": 93.75, "elapsed_time": "13:44:57", "remaining_time": "0:54:59"} +{"current_steps": 3796, "total_steps": 4048, "loss": 0.25539106130599976, "lr": 2.128977385608555e-07, "epoch": 1.8758186086741628, "percentage": 93.77, "elapsed_time": "13:45:09", "remaining_time": "0:54:46"} +{"current_steps": 3797, "total_steps": 4048, "loss": 0.23766650259494781, "lr": 2.1122401685197747e-07, "epoch": 1.876312862967997, "percentage": 93.8, "elapsed_time": "13:45:23", "remaining_time": "0:54:33"} +{"current_steps": 3798, "total_steps": 4048, "loss": 0.24102288484573364, "lr": 2.095568299900841e-07, "epoch": 1.8768071172618312, "percentage": 93.82, "elapsed_time": "13:45:35", "remaining_time": "0:54:20"} +{"current_steps": 3799, "total_steps": 4048, "loss": 0.25168395042419434, "lr": 2.0789617908816063e-07, "epoch": 1.8773013715556655, "percentage": 93.85, "elapsed_time": "13:45:48", "remaining_time": "0:54:07"} +{"current_steps": 3800, "total_steps": 4048, "loss": 0.23417149484157562, "lr": 2.0624206525483582e-07, "epoch": 1.8777956258494997, "percentage": 93.87, "elapsed_time": "13:46:01", "remaining_time": "0:53:54"} +{"current_steps": 3801, "total_steps": 4048, "loss": 0.2875264883041382, "lr": 2.04594489594373e-07, "epoch": 1.8782898801433339, "percentage": 93.9, "elapsed_time": "13:46:20", "remaining_time": "0:53:41"} +{"current_steps": 3802, "total_steps": 4048, "loss": 0.24828693270683289, "lr": 2.0295345320667014e-07, "epoch": 1.878784134437168, "percentage": 93.92, "elapsed_time": "13:46:33", "remaining_time": "0:53:28"} +{"current_steps": 3803, "total_steps": 4048, "loss": 0.23279064893722534, "lr": 2.013189571872587e-07, "epoch": 1.879278388731002, "percentage": 93.95, "elapsed_time": "13:46:46", "remaining_time": "0:53:15"} +{"current_steps": 3804, "total_steps": 4048, "loss": 0.2099420577287674, "lr": 1.996910026273058e-07, "epoch": 1.8797726430248363, "percentage": 93.97, "elapsed_time": "13:47:00", "remaining_time": "0:53:02"} +{"current_steps": 3805, "total_steps": 4048, "loss": 0.25043174624443054, "lr": 1.9806959061360985e-07, "epoch": 1.8802668973186705, "percentage": 94.0, "elapsed_time": "13:47:13", "remaining_time": "0:52:49"} +{"current_steps": 3806, "total_steps": 4048, "loss": 0.2606011927127838, "lr": 1.9645472222860286e-07, "epoch": 1.8807611516125047, "percentage": 94.02, "elapsed_time": "13:47:26", "remaining_time": "0:52:36"} +{"current_steps": 3807, "total_steps": 4048, "loss": 0.22487565875053406, "lr": 1.948463985503468e-07, "epoch": 1.8812554059063387, "percentage": 94.05, "elapsed_time": "13:47:40", "remaining_time": "0:52:23"} +{"current_steps": 3808, "total_steps": 4048, "loss": 0.29611343145370483, "lr": 1.9324462065253735e-07, "epoch": 1.8817496602001729, "percentage": 94.07, "elapsed_time": "13:47:53", "remaining_time": "0:52:10"} +{"current_steps": 3809, "total_steps": 4048, "loss": 0.2301706224679947, "lr": 1.9164938960449685e-07, "epoch": 1.882243914494007, "percentage": 94.1, "elapsed_time": "13:48:06", "remaining_time": "0:51:57"} +{"current_steps": 3810, "total_steps": 4048, "loss": 0.2306794822216034, "lr": 1.9006070647118015e-07, "epoch": 1.8827381687878413, "percentage": 94.12, "elapsed_time": "13:48:20", "remaining_time": "0:51:44"} +{"current_steps": 3811, "total_steps": 4048, "loss": 0.2588786482810974, "lr": 1.884785723131688e-07, "epoch": 1.8832324230816755, "percentage": 94.15, "elapsed_time": "13:48:33", "remaining_time": "0:51:31"} +{"current_steps": 3812, "total_steps": 4048, "loss": 0.2795346677303314, "lr": 1.8690298818667463e-07, "epoch": 1.8837266773755097, "percentage": 94.17, "elapsed_time": "13:48:46", "remaining_time": "0:51:18"} +{"current_steps": 3813, "total_steps": 4048, "loss": 0.2313271164894104, "lr": 1.853339551435318e-07, "epoch": 1.884220931669344, "percentage": 94.19, "elapsed_time": "13:49:00", "remaining_time": "0:51:05"} +{"current_steps": 3814, "total_steps": 4048, "loss": 0.22814632952213287, "lr": 1.8377147423120467e-07, "epoch": 1.884715185963178, "percentage": 94.22, "elapsed_time": "13:49:13", "remaining_time": "0:50:52"} +{"current_steps": 3815, "total_steps": 4048, "loss": 0.2605836093425751, "lr": 1.822155464927866e-07, "epoch": 1.8852094402570123, "percentage": 94.24, "elapsed_time": "13:49:26", "remaining_time": "0:50:39"} +{"current_steps": 3816, "total_steps": 4048, "loss": 0.23902952671051025, "lr": 1.8066617296699007e-07, "epoch": 1.8857036945508465, "percentage": 94.27, "elapsed_time": "13:49:39", "remaining_time": "0:50:26"} +{"current_steps": 3817, "total_steps": 4048, "loss": 0.24895761907100677, "lr": 1.7912335468815545e-07, "epoch": 1.8861979488446807, "percentage": 94.29, "elapsed_time": "13:49:52", "remaining_time": "0:50:13"} +{"current_steps": 3818, "total_steps": 4048, "loss": 0.24108648300170898, "lr": 1.7758709268624664e-07, "epoch": 1.8866922031385147, "percentage": 94.32, "elapsed_time": "13:50:05", "remaining_time": "0:50:00"} +{"current_steps": 3819, "total_steps": 4048, "loss": 0.2600073516368866, "lr": 1.7605738798684767e-07, "epoch": 1.887186457432349, "percentage": 94.34, "elapsed_time": "13:50:19", "remaining_time": "0:49:47"} +{"current_steps": 3820, "total_steps": 4048, "loss": 0.21564190089702606, "lr": 1.745342416111706e-07, "epoch": 1.8876807117261831, "percentage": 94.37, "elapsed_time": "13:50:32", "remaining_time": "0:49:34"} +{"current_steps": 3821, "total_steps": 4048, "loss": 0.24080556631088257, "lr": 1.7301765457604647e-07, "epoch": 1.8881749660200173, "percentage": 94.39, "elapsed_time": "13:50:45", "remaining_time": "0:49:21"} +{"current_steps": 3822, "total_steps": 4048, "loss": 0.22631056606769562, "lr": 1.7150762789392316e-07, "epoch": 1.8886692203138513, "percentage": 94.42, "elapsed_time": "13:50:58", "remaining_time": "0:49:08"} +{"current_steps": 3823, "total_steps": 4048, "loss": 0.26355087757110596, "lr": 1.7000416257287654e-07, "epoch": 1.8891634746076855, "percentage": 94.44, "elapsed_time": "13:51:11", "remaining_time": "0:48:55"} +{"current_steps": 3824, "total_steps": 4048, "loss": 0.248369500041008, "lr": 1.685072596165982e-07, "epoch": 1.8896577289015197, "percentage": 94.47, "elapsed_time": "13:51:24", "remaining_time": "0:48:42"} +{"current_steps": 3825, "total_steps": 4048, "loss": 0.2789249122142792, "lr": 1.670169200243976e-07, "epoch": 1.890151983195354, "percentage": 94.49, "elapsed_time": "13:51:38", "remaining_time": "0:48:29"} +{"current_steps": 3826, "total_steps": 4048, "loss": 0.22493675351142883, "lr": 1.6553314479120453e-07, "epoch": 1.8906462374891881, "percentage": 94.52, "elapsed_time": "13:51:51", "remaining_time": "0:48:16"} +{"current_steps": 3827, "total_steps": 4048, "loss": 0.21274074912071228, "lr": 1.6405593490756766e-07, "epoch": 1.8911404917830223, "percentage": 94.54, "elapsed_time": "13:52:04", "remaining_time": "0:48:03"} +{"current_steps": 3828, "total_steps": 4048, "loss": 0.2591193914413452, "lr": 1.6258529135964928e-07, "epoch": 1.8916347460768566, "percentage": 94.57, "elapsed_time": "13:52:17", "remaining_time": "0:47:49"} +{"current_steps": 3829, "total_steps": 4048, "loss": 0.2791387140750885, "lr": 1.6112121512923075e-07, "epoch": 1.8921290003706908, "percentage": 94.59, "elapsed_time": "13:52:31", "remaining_time": "0:47:36"} +{"current_steps": 3830, "total_steps": 4048, "loss": 0.2840545177459717, "lr": 1.5966370719371015e-07, "epoch": 1.892623254664525, "percentage": 94.61, "elapsed_time": "13:52:44", "remaining_time": "0:47:23"} +{"current_steps": 3831, "total_steps": 4048, "loss": 0.2563555836677551, "lr": 1.582127685260948e-07, "epoch": 1.8931175089583592, "percentage": 94.64, "elapsed_time": "13:52:57", "remaining_time": "0:47:10"} +{"current_steps": 3832, "total_steps": 4048, "loss": 0.22912704944610596, "lr": 1.5676840009501538e-07, "epoch": 1.8936117632521934, "percentage": 94.66, "elapsed_time": "13:53:11", "remaining_time": "0:46:57"} +{"current_steps": 3833, "total_steps": 4048, "loss": 0.25490787625312805, "lr": 1.5533060286470837e-07, "epoch": 1.8941060175460276, "percentage": 94.69, "elapsed_time": "13:53:24", "remaining_time": "0:46:44"} +{"current_steps": 3834, "total_steps": 4048, "loss": 0.21826709806919098, "lr": 1.5389937779502818e-07, "epoch": 1.8946002718398616, "percentage": 94.71, "elapsed_time": "13:53:38", "remaining_time": "0:46:31"} +{"current_steps": 3835, "total_steps": 4048, "loss": 0.2292749583721161, "lr": 1.524747258414394e-07, "epoch": 1.8950945261336958, "percentage": 94.74, "elapsed_time": "13:53:51", "remaining_time": "0:46:18"} +{"current_steps": 3836, "total_steps": 4048, "loss": 0.24652332067489624, "lr": 1.5105664795501908e-07, "epoch": 1.89558878042753, "percentage": 94.76, "elapsed_time": "13:54:04", "remaining_time": "0:46:05"} +{"current_steps": 3837, "total_steps": 4048, "loss": 0.25154706835746765, "lr": 1.4964514508245652e-07, "epoch": 1.8960830347213642, "percentage": 94.79, "elapsed_time": "13:54:17", "remaining_time": "0:45:52"} +{"current_steps": 3838, "total_steps": 4048, "loss": 0.2414158582687378, "lr": 1.482402181660525e-07, "epoch": 1.8965772890151982, "percentage": 94.81, "elapsed_time": "13:54:31", "remaining_time": "0:45:39"} +{"current_steps": 3839, "total_steps": 4048, "loss": 0.22421908378601074, "lr": 1.4684186814371225e-07, "epoch": 1.8970715433090324, "percentage": 94.84, "elapsed_time": "13:54:44", "remaining_time": "0:45:26"} +{"current_steps": 3840, "total_steps": 4048, "loss": 0.2506029009819031, "lr": 1.4545009594895687e-07, "epoch": 1.8975657976028666, "percentage": 94.86, "elapsed_time": "13:54:57", "remaining_time": "0:45:13"} +{"current_steps": 3841, "total_steps": 4048, "loss": 0.2011726200580597, "lr": 1.440649025109142e-07, "epoch": 1.8980600518967008, "percentage": 94.89, "elapsed_time": "13:55:10", "remaining_time": "0:45:00"} +{"current_steps": 3842, "total_steps": 4048, "loss": 0.27702796459198, "lr": 1.4268628875431677e-07, "epoch": 1.898554306190535, "percentage": 94.91, "elapsed_time": "13:55:24", "remaining_time": "0:44:47"} +{"current_steps": 3843, "total_steps": 4048, "loss": 0.23884715139865875, "lr": 1.413142555995095e-07, "epoch": 1.8990485604843692, "percentage": 94.94, "elapsed_time": "13:55:37", "remaining_time": "0:44:34"} +{"current_steps": 3844, "total_steps": 4048, "loss": 0.2191702425479889, "lr": 1.3994880396244304e-07, "epoch": 1.8995428147782034, "percentage": 94.96, "elapsed_time": "13:55:51", "remaining_time": "0:44:21"} +{"current_steps": 3845, "total_steps": 4048, "loss": 0.25425833463668823, "lr": 1.385899347546704e-07, "epoch": 1.9000370690720376, "percentage": 94.99, "elapsed_time": "13:56:04", "remaining_time": "0:44:08"} +{"current_steps": 3846, "total_steps": 4048, "loss": 0.23355990648269653, "lr": 1.37237648883356e-07, "epoch": 1.9005313233658718, "percentage": 95.01, "elapsed_time": "13:56:18", "remaining_time": "0:43:55"} +{"current_steps": 3847, "total_steps": 4048, "loss": 0.2079685628414154, "lr": 1.3589194725126542e-07, "epoch": 1.901025577659706, "percentage": 95.03, "elapsed_time": "13:56:30", "remaining_time": "0:43:42"} +{"current_steps": 3848, "total_steps": 4048, "loss": 0.25126928091049194, "lr": 1.3455283075676895e-07, "epoch": 1.9015198319535402, "percentage": 95.06, "elapsed_time": "13:56:44", "remaining_time": "0:43:29"} +{"current_steps": 3849, "total_steps": 4048, "loss": 0.2608864903450012, "lr": 1.332203002938437e-07, "epoch": 1.9020140862473742, "percentage": 95.08, "elapsed_time": "13:56:58", "remaining_time": "0:43:16"} +{"current_steps": 3850, "total_steps": 4048, "loss": 0.27048414945602417, "lr": 1.3189435675206697e-07, "epoch": 1.9025083405412084, "percentage": 95.11, "elapsed_time": "13:57:11", "remaining_time": "0:43:03"} +{"current_steps": 3851, "total_steps": 4048, "loss": 0.24350577592849731, "lr": 1.3057500101661846e-07, "epoch": 1.9030025948350426, "percentage": 95.13, "elapsed_time": "13:57:24", "remaining_time": "0:42:50"} +{"current_steps": 3852, "total_steps": 4048, "loss": 0.23283880949020386, "lr": 1.2926223396828363e-07, "epoch": 1.9034968491288768, "percentage": 95.16, "elapsed_time": "13:57:37", "remaining_time": "0:42:37"} +{"current_steps": 3853, "total_steps": 4048, "loss": 0.23332493007183075, "lr": 1.2795605648344477e-07, "epoch": 1.9039911034227108, "percentage": 95.18, "elapsed_time": "13:57:51", "remaining_time": "0:42:24"} +{"current_steps": 3854, "total_steps": 4048, "loss": 0.19833901524543762, "lr": 1.2665646943408882e-07, "epoch": 1.904485357716545, "percentage": 95.21, "elapsed_time": "13:58:04", "remaining_time": "0:42:11"} +{"current_steps": 3855, "total_steps": 4048, "loss": 0.23650333285331726, "lr": 1.2536347368780066e-07, "epoch": 1.9049796120103792, "percentage": 95.23, "elapsed_time": "13:58:18", "remaining_time": "0:41:58"} +{"current_steps": 3856, "total_steps": 4048, "loss": 0.20151859521865845, "lr": 1.240770701077665e-07, "epoch": 1.9054738663042134, "percentage": 95.26, "elapsed_time": "13:58:31", "remaining_time": "0:41:45"} +{"current_steps": 3857, "total_steps": 4048, "loss": 0.32347559928894043, "lr": 1.2279725955277044e-07, "epoch": 1.9059681205980477, "percentage": 95.28, "elapsed_time": "13:58:44", "remaining_time": "0:41:32"} +{"current_steps": 3858, "total_steps": 4048, "loss": 0.25937923789024353, "lr": 1.215240428771969e-07, "epoch": 1.9064623748918819, "percentage": 95.31, "elapsed_time": "13:58:58", "remaining_time": "0:41:19"} +{"current_steps": 3859, "total_steps": 4048, "loss": 0.2648822069168091, "lr": 1.2025742093102477e-07, "epoch": 1.906956629185716, "percentage": 95.33, "elapsed_time": "13:59:11", "remaining_time": "0:41:06"} +{"current_steps": 3860, "total_steps": 4048, "loss": 0.27612054347991943, "lr": 1.1899739455983327e-07, "epoch": 1.9074508834795503, "percentage": 95.36, "elapsed_time": "13:59:24", "remaining_time": "0:40:52"} +{"current_steps": 3861, "total_steps": 4048, "loss": 0.2204264998435974, "lr": 1.1774396460480064e-07, "epoch": 1.9079451377733845, "percentage": 95.38, "elapsed_time": "13:59:38", "remaining_time": "0:40:39"} +{"current_steps": 3862, "total_steps": 4048, "loss": 0.2719968557357788, "lr": 1.164971319026964e-07, "epoch": 1.9084393920672187, "percentage": 95.41, "elapsed_time": "13:59:51", "remaining_time": "0:40:26"} +{"current_steps": 3863, "total_steps": 4048, "loss": 0.2308243364095688, "lr": 1.1525689728588807e-07, "epoch": 1.908933646361053, "percentage": 95.43, "elapsed_time": "14:00:05", "remaining_time": "0:40:13"} +{"current_steps": 3864, "total_steps": 4048, "loss": 0.23281638324260712, "lr": 1.1402326158234e-07, "epoch": 1.909427900654887, "percentage": 95.45, "elapsed_time": "14:00:18", "remaining_time": "0:40:00"} +{"current_steps": 3865, "total_steps": 4048, "loss": 0.26273444294929504, "lr": 1.127962256156101e-07, "epoch": 1.909922154948721, "percentage": 95.48, "elapsed_time": "14:00:31", "remaining_time": "0:39:47"} +{"current_steps": 3866, "total_steps": 4048, "loss": 0.26783496141433716, "lr": 1.1157579020484755e-07, "epoch": 1.9104164092425553, "percentage": 95.5, "elapsed_time": "14:00:45", "remaining_time": "0:39:34"} +{"current_steps": 3867, "total_steps": 4048, "loss": 0.2575075626373291, "lr": 1.1036195616480061e-07, "epoch": 1.9109106635363895, "percentage": 95.53, "elapsed_time": "14:00:58", "remaining_time": "0:39:21"} +{"current_steps": 3868, "total_steps": 4048, "loss": 0.24802085757255554, "lr": 1.0915472430580443e-07, "epoch": 1.9114049178302237, "percentage": 95.55, "elapsed_time": "14:01:11", "remaining_time": "0:39:08"} +{"current_steps": 3869, "total_steps": 4048, "loss": 0.22017821669578552, "lr": 1.0795409543379099e-07, "epoch": 1.9118991721240577, "percentage": 95.58, "elapsed_time": "14:01:24", "remaining_time": "0:38:55"} +{"current_steps": 3870, "total_steps": 4048, "loss": 0.2525743246078491, "lr": 1.0676007035028579e-07, "epoch": 1.912393426417892, "percentage": 95.6, "elapsed_time": "14:01:38", "remaining_time": "0:38:42"} +{"current_steps": 3871, "total_steps": 4048, "loss": 0.26704782247543335, "lr": 1.05572649852399e-07, "epoch": 1.912887680711726, "percentage": 95.63, "elapsed_time": "14:01:51", "remaining_time": "0:38:29"} +{"current_steps": 3872, "total_steps": 4048, "loss": 0.25393134355545044, "lr": 1.0439183473283654e-07, "epoch": 1.9133819350055603, "percentage": 95.65, "elapsed_time": "14:02:04", "remaining_time": "0:38:16"} +{"current_steps": 3873, "total_steps": 4048, "loss": 0.27266988158226013, "lr": 1.0321762577989448e-07, "epoch": 1.9138761892993945, "percentage": 95.68, "elapsed_time": "14:02:17", "remaining_time": "0:38:03"} +{"current_steps": 3874, "total_steps": 4048, "loss": 0.2694425582885742, "lr": 1.0205002377745799e-07, "epoch": 1.9143704435932287, "percentage": 95.7, "elapsed_time": "14:02:30", "remaining_time": "0:37:50"} +{"current_steps": 3875, "total_steps": 4048, "loss": 0.28820598125457764, "lr": 1.0088902950500023e-07, "epoch": 1.914864697887063, "percentage": 95.73, "elapsed_time": "14:02:43", "remaining_time": "0:37:37"} +{"current_steps": 3876, "total_steps": 4048, "loss": 0.2194051444530487, "lr": 9.973464373758679e-08, "epoch": 1.9153589521808971, "percentage": 95.75, "elapsed_time": "14:02:56", "remaining_time": "0:37:24"} +{"current_steps": 3877, "total_steps": 4048, "loss": 0.25639402866363525, "lr": 9.858686724586675e-08, "epoch": 1.9158532064747313, "percentage": 95.78, "elapsed_time": "14:03:10", "remaining_time": "0:37:11"} +{"current_steps": 3878, "total_steps": 4048, "loss": 0.23420584201812744, "lr": 9.744570079608051e-08, "epoch": 1.9163474607685655, "percentage": 95.8, "elapsed_time": "14:03:23", "remaining_time": "0:36:58"} +{"current_steps": 3879, "total_steps": 4048, "loss": 0.2514578700065613, "lr": 9.631114515005425e-08, "epoch": 1.9168417150623998, "percentage": 95.83, "elapsed_time": "14:03:37", "remaining_time": "0:36:45"} +{"current_steps": 3880, "total_steps": 4048, "loss": 0.2223532646894455, "lr": 9.518320106520096e-08, "epoch": 1.9173359693562337, "percentage": 95.85, "elapsed_time": "14:03:50", "remaining_time": "0:36:32"} +{"current_steps": 3881, "total_steps": 4048, "loss": 0.21725934743881226, "lr": 9.406186929451943e-08, "epoch": 1.917830223650068, "percentage": 95.87, "elapsed_time": "14:04:03", "remaining_time": "0:36:19"} +{"current_steps": 3882, "total_steps": 4048, "loss": 0.2081519365310669, "lr": 9.294715058659531e-08, "epoch": 1.9183244779439022, "percentage": 95.9, "elapsed_time": "14:04:17", "remaining_time": "0:36:06"} +{"current_steps": 3883, "total_steps": 4048, "loss": 0.23683780431747437, "lr": 9.183904568559998e-08, "epoch": 1.9188187322377364, "percentage": 95.92, "elapsed_time": "14:04:30", "remaining_time": "0:35:53"} +{"current_steps": 3884, "total_steps": 4048, "loss": 0.26095467805862427, "lr": 9.073755533128725e-08, "epoch": 1.9193129865315703, "percentage": 95.95, "elapsed_time": "14:04:43", "remaining_time": "0:35:40"} +{"current_steps": 3885, "total_steps": 4048, "loss": 0.24427568912506104, "lr": 8.964268025899558e-08, "epoch": 1.9198072408254045, "percentage": 95.97, "elapsed_time": "14:04:56", "remaining_time": "0:35:27"} +{"current_steps": 3886, "total_steps": 4048, "loss": 0.23549365997314453, "lr": 8.855442119964919e-08, "epoch": 1.9203014951192388, "percentage": 96.0, "elapsed_time": "14:05:10", "remaining_time": "0:35:14"} +{"current_steps": 3887, "total_steps": 4048, "loss": 0.2645740807056427, "lr": 8.74727788797547e-08, "epoch": 1.920795749413073, "percentage": 96.02, "elapsed_time": "14:05:23", "remaining_time": "0:35:00"} +{"current_steps": 3888, "total_steps": 4048, "loss": 0.22890612483024597, "lr": 8.639775402139894e-08, "epoch": 1.9212900037069072, "percentage": 96.05, "elapsed_time": "14:05:37", "remaining_time": "0:34:47"} +{"current_steps": 3889, "total_steps": 4048, "loss": 0.23417067527770996, "lr": 8.532934734225451e-08, "epoch": 1.9217842580007414, "percentage": 96.07, "elapsed_time": "14:05:50", "remaining_time": "0:34:34"} +{"current_steps": 3890, "total_steps": 4048, "loss": 0.26125872135162354, "lr": 8.42675595555753e-08, "epoch": 1.9222785122945756, "percentage": 96.1, "elapsed_time": "14:06:03", "remaining_time": "0:34:21"} +{"current_steps": 3891, "total_steps": 4048, "loss": 0.26559343934059143, "lr": 8.321239137019433e-08, "epoch": 1.9227727665884098, "percentage": 96.12, "elapsed_time": "14:06:16", "remaining_time": "0:34:08"} +{"current_steps": 3892, "total_steps": 4048, "loss": 0.2033136785030365, "lr": 8.216384349052809e-08, "epoch": 1.923267020882244, "percentage": 96.15, "elapsed_time": "14:06:30", "remaining_time": "0:33:55"} +{"current_steps": 3893, "total_steps": 4048, "loss": 0.2750868797302246, "lr": 8.112191661656999e-08, "epoch": 1.9237612751760782, "percentage": 96.17, "elapsed_time": "14:06:42", "remaining_time": "0:33:42"} +{"current_steps": 3894, "total_steps": 4048, "loss": 0.2082993984222412, "lr": 8.008661144389807e-08, "epoch": 1.9242555294699124, "percentage": 96.2, "elapsed_time": "14:06:56", "remaining_time": "0:33:29"} +{"current_steps": 3895, "total_steps": 4048, "loss": 0.2495439350605011, "lr": 7.905792866366501e-08, "epoch": 1.9247497837637466, "percentage": 96.22, "elapsed_time": "14:07:09", "remaining_time": "0:33:16"} +{"current_steps": 3896, "total_steps": 4048, "loss": 0.25609591603279114, "lr": 7.803586896260707e-08, "epoch": 1.9252440380575806, "percentage": 96.25, "elapsed_time": "14:07:22", "remaining_time": "0:33:03"} +{"current_steps": 3897, "total_steps": 4048, "loss": 0.25372135639190674, "lr": 7.702043302303397e-08, "epoch": 1.9257382923514148, "percentage": 96.27, "elapsed_time": "14:07:35", "remaining_time": "0:32:50"} +{"current_steps": 3898, "total_steps": 4048, "loss": 0.21882784366607666, "lr": 7.601162152283904e-08, "epoch": 1.926232546645249, "percentage": 96.29, "elapsed_time": "14:07:49", "remaining_time": "0:32:37"} +{"current_steps": 3899, "total_steps": 4048, "loss": 0.24513296782970428, "lr": 7.500943513548797e-08, "epoch": 1.926726800939083, "percentage": 96.32, "elapsed_time": "14:08:02", "remaining_time": "0:32:24"} +{"current_steps": 3900, "total_steps": 4048, "loss": 0.23508042097091675, "lr": 7.401387453002673e-08, "epoch": 1.9272210552329172, "percentage": 96.34, "elapsed_time": "14:08:15", "remaining_time": "0:32:11"} +{"current_steps": 3901, "total_steps": 4048, "loss": 0.2288282811641693, "lr": 7.30249403710792e-08, "epoch": 1.9277153095267514, "percentage": 96.37, "elapsed_time": "14:08:34", "remaining_time": "0:31:58"} +{"current_steps": 3902, "total_steps": 4048, "loss": 0.24606133997440338, "lr": 7.204263331884175e-08, "epoch": 1.9282095638205856, "percentage": 96.39, "elapsed_time": "14:08:47", "remaining_time": "0:31:45"} +{"current_steps": 3903, "total_steps": 4048, "loss": 0.2710507810115814, "lr": 7.10669540290887e-08, "epoch": 1.9287038181144198, "percentage": 96.42, "elapsed_time": "14:09:00", "remaining_time": "0:31:32"} +{"current_steps": 3904, "total_steps": 4048, "loss": 0.27333927154541016, "lr": 7.009790315317122e-08, "epoch": 1.929198072408254, "percentage": 96.44, "elapsed_time": "14:09:14", "remaining_time": "0:31:19"} +{"current_steps": 3905, "total_steps": 4048, "loss": 0.27518531680107117, "lr": 6.913548133801074e-08, "epoch": 1.9296923267020882, "percentage": 96.47, "elapsed_time": "14:09:27", "remaining_time": "0:31:06"} +{"current_steps": 3906, "total_steps": 4048, "loss": 0.24289458990097046, "lr": 6.817968922610884e-08, "epoch": 1.9301865809959224, "percentage": 96.49, "elapsed_time": "14:09:40", "remaining_time": "0:30:53"} +{"current_steps": 3907, "total_steps": 4048, "loss": 0.225175678730011, "lr": 6.723052745553848e-08, "epoch": 1.9306808352897566, "percentage": 96.52, "elapsed_time": "14:09:54", "remaining_time": "0:30:40"} +{"current_steps": 3908, "total_steps": 4048, "loss": 0.2592085599899292, "lr": 6.628799665994612e-08, "epoch": 1.9311750895835909, "percentage": 96.54, "elapsed_time": "14:10:07", "remaining_time": "0:30:27"} +{"current_steps": 3909, "total_steps": 4048, "loss": 0.2649756968021393, "lr": 6.535209746855064e-08, "epoch": 1.931669343877425, "percentage": 96.57, "elapsed_time": "14:10:20", "remaining_time": "0:30:14"} +{"current_steps": 3910, "total_steps": 4048, "loss": 0.2318311631679535, "lr": 6.442283050614673e-08, "epoch": 1.9321635981712593, "percentage": 96.59, "elapsed_time": "14:10:33", "remaining_time": "0:30:01"} +{"current_steps": 3911, "total_steps": 4048, "loss": 0.252924382686615, "lr": 6.350019639309923e-08, "epoch": 1.9326578524650933, "percentage": 96.62, "elapsed_time": "14:10:46", "remaining_time": "0:29:48"} +{"current_steps": 3912, "total_steps": 4048, "loss": 0.1903652548789978, "lr": 6.258419574534547e-08, "epoch": 1.9331521067589275, "percentage": 96.64, "elapsed_time": "14:10:59", "remaining_time": "0:29:35"} +{"current_steps": 3913, "total_steps": 4048, "loss": 0.22795221209526062, "lr": 6.167482917439404e-08, "epoch": 1.9336463610527617, "percentage": 96.67, "elapsed_time": "14:11:13", "remaining_time": "0:29:22"} +{"current_steps": 3914, "total_steps": 4048, "loss": 0.26521584391593933, "lr": 6.077209728732492e-08, "epoch": 1.9341406153465959, "percentage": 96.69, "elapsed_time": "14:11:25", "remaining_time": "0:29:08"} +{"current_steps": 3915, "total_steps": 4048, "loss": 0.22152049839496613, "lr": 5.987600068679045e-08, "epoch": 1.9346348696404299, "percentage": 96.71, "elapsed_time": "14:11:39", "remaining_time": "0:28:55"} +{"current_steps": 3916, "total_steps": 4048, "loss": 0.22663083672523499, "lr": 5.898653997100989e-08, "epoch": 1.935129123934264, "percentage": 96.74, "elapsed_time": "14:11:52", "remaining_time": "0:28:42"} +{"current_steps": 3917, "total_steps": 4048, "loss": 0.23720389604568481, "lr": 5.8103715733776047e-08, "epoch": 1.9356233782280983, "percentage": 96.76, "elapsed_time": "14:12:06", "remaining_time": "0:28:29"} +{"current_steps": 3918, "total_steps": 4048, "loss": 0.24053935706615448, "lr": 5.722752856444858e-08, "epoch": 1.9361176325219325, "percentage": 96.79, "elapsed_time": "14:12:18", "remaining_time": "0:28:16"} +{"current_steps": 3919, "total_steps": 4048, "loss": 0.26565641164779663, "lr": 5.635797904795848e-08, "epoch": 1.9366118868157667, "percentage": 96.81, "elapsed_time": "14:12:32", "remaining_time": "0:28:03"} +{"current_steps": 3920, "total_steps": 4048, "loss": 0.27181264758110046, "lr": 5.5495067764804736e-08, "epoch": 1.9371061411096009, "percentage": 96.84, "elapsed_time": "14:12:45", "remaining_time": "0:27:50"} +{"current_steps": 3921, "total_steps": 4048, "loss": 0.23340710997581482, "lr": 5.46387952910532e-08, "epoch": 1.937600395403435, "percentage": 96.86, "elapsed_time": "14:12:58", "remaining_time": "0:27:37"} +{"current_steps": 3922, "total_steps": 4048, "loss": 0.19458985328674316, "lr": 5.378916219833996e-08, "epoch": 1.9380946496972693, "percentage": 96.89, "elapsed_time": "14:13:11", "remaining_time": "0:27:24"} +{"current_steps": 3923, "total_steps": 4048, "loss": 0.22900202870368958, "lr": 5.2946169053869066e-08, "epoch": 1.9385889039911035, "percentage": 96.91, "elapsed_time": "14:13:25", "remaining_time": "0:27:11"} +{"current_steps": 3924, "total_steps": 4048, "loss": 0.30710160732269287, "lr": 5.210981642040924e-08, "epoch": 1.9390831582849377, "percentage": 96.94, "elapsed_time": "14:13:38", "remaining_time": "0:26:58"} +{"current_steps": 3925, "total_steps": 4048, "loss": 0.1754809319972992, "lr": 5.12801048562972e-08, "epoch": 1.939577412578772, "percentage": 96.96, "elapsed_time": "14:13:51", "remaining_time": "0:26:45"} +{"current_steps": 3926, "total_steps": 4048, "loss": 0.28787121176719666, "lr": 5.045703491543763e-08, "epoch": 1.940071666872606, "percentage": 96.99, "elapsed_time": "14:14:04", "remaining_time": "0:26:32"} +{"current_steps": 3927, "total_steps": 4048, "loss": 0.2239963263273239, "lr": 4.96406071472999e-08, "epoch": 1.94056592116644, "percentage": 97.01, "elapsed_time": "14:14:17", "remaining_time": "0:26:19"} +{"current_steps": 3928, "total_steps": 4048, "loss": 0.2121300995349884, "lr": 4.883082209692025e-08, "epoch": 1.9410601754602743, "percentage": 97.04, "elapsed_time": "14:14:31", "remaining_time": "0:26:06"} +{"current_steps": 3929, "total_steps": 4048, "loss": 0.23445773124694824, "lr": 4.802768030489735e-08, "epoch": 1.9415544297541085, "percentage": 97.06, "elapsed_time": "14:14:44", "remaining_time": "0:25:53"} +{"current_steps": 3930, "total_steps": 4048, "loss": 0.2369021326303482, "lr": 4.7231182307400095e-08, "epoch": 1.9420486840479425, "percentage": 97.08, "elapsed_time": "14:14:57", "remaining_time": "0:25:40"} +{"current_steps": 3931, "total_steps": 4048, "loss": 0.2764047086238861, "lr": 4.644132863615758e-08, "epoch": 1.9425429383417767, "percentage": 97.11, "elapsed_time": "14:15:10", "remaining_time": "0:25:27"} +{"current_steps": 3932, "total_steps": 4048, "loss": 0.26021280884742737, "lr": 4.565811981846468e-08, "epoch": 1.943037192635611, "percentage": 97.13, "elapsed_time": "14:15:23", "remaining_time": "0:25:14"} +{"current_steps": 3933, "total_steps": 4048, "loss": 0.26012706756591797, "lr": 4.488155637718095e-08, "epoch": 1.9435314469294451, "percentage": 97.16, "elapsed_time": "14:15:36", "remaining_time": "0:25:01"} +{"current_steps": 3934, "total_steps": 4048, "loss": 0.22092604637145996, "lr": 4.4111638830729444e-08, "epoch": 1.9440257012232793, "percentage": 97.18, "elapsed_time": "14:15:49", "remaining_time": "0:24:48"} +{"current_steps": 3935, "total_steps": 4048, "loss": 0.24200648069381714, "lr": 4.334836769309347e-08, "epoch": 1.9445199555171135, "percentage": 97.21, "elapsed_time": "14:16:02", "remaining_time": "0:24:34"} +{"current_steps": 3936, "total_steps": 4048, "loss": 0.2545608580112457, "lr": 4.2591743473826554e-08, "epoch": 1.9450142098109477, "percentage": 97.23, "elapsed_time": "14:16:16", "remaining_time": "0:24:21"} +{"current_steps": 3937, "total_steps": 4048, "loss": 0.24908477067947388, "lr": 4.1841766678036854e-08, "epoch": 1.945508464104782, "percentage": 97.26, "elapsed_time": "14:16:29", "remaining_time": "0:24:08"} +{"current_steps": 3938, "total_steps": 4048, "loss": 0.23568233847618103, "lr": 4.109843780639833e-08, "epoch": 1.9460027183986162, "percentage": 97.28, "elapsed_time": "14:16:42", "remaining_time": "0:23:55"} +{"current_steps": 3939, "total_steps": 4048, "loss": 0.2230791449546814, "lr": 4.0361757355147355e-08, "epoch": 1.9464969726924504, "percentage": 97.31, "elapsed_time": "14:16:55", "remaining_time": "0:23:42"} +{"current_steps": 3940, "total_steps": 4048, "loss": 0.2541523277759552, "lr": 3.963172581608166e-08, "epoch": 1.9469912269862846, "percentage": 97.33, "elapsed_time": "14:17:08", "remaining_time": "0:23:29"} +{"current_steps": 3941, "total_steps": 4048, "loss": 0.2466837763786316, "lr": 3.8908343676559156e-08, "epoch": 1.9474854812801188, "percentage": 97.36, "elapsed_time": "14:17:21", "remaining_time": "0:23:16"} +{"current_steps": 3942, "total_steps": 4048, "loss": 0.2700938880443573, "lr": 3.819161141950134e-08, "epoch": 1.9479797355739528, "percentage": 97.38, "elapsed_time": "14:17:34", "remaining_time": "0:23:03"} +{"current_steps": 3943, "total_steps": 4048, "loss": 0.2353779673576355, "lr": 3.7481529523384355e-08, "epoch": 1.948473989867787, "percentage": 97.41, "elapsed_time": "14:17:47", "remaining_time": "0:22:50"} +{"current_steps": 3944, "total_steps": 4048, "loss": 0.25708913803100586, "lr": 3.677809846225344e-08, "epoch": 1.9489682441616212, "percentage": 97.43, "elapsed_time": "14:18:00", "remaining_time": "0:22:37"} +{"current_steps": 3945, "total_steps": 4048, "loss": 0.26113903522491455, "lr": 3.6081318705705195e-08, "epoch": 1.9494624984554554, "percentage": 97.46, "elapsed_time": "14:18:13", "remaining_time": "0:22:24"} +{"current_steps": 3946, "total_steps": 4048, "loss": 0.2561355531215668, "lr": 3.539119071890307e-08, "epoch": 1.9499567527492894, "percentage": 97.48, "elapsed_time": "14:18:26", "remaining_time": "0:22:11"} +{"current_steps": 3947, "total_steps": 4048, "loss": 0.24893885850906372, "lr": 3.470771496256409e-08, "epoch": 1.9504510070431236, "percentage": 97.5, "elapsed_time": "14:18:39", "remaining_time": "0:21:58"} +{"current_steps": 3948, "total_steps": 4048, "loss": 0.25399699807167053, "lr": 3.403089189296771e-08, "epoch": 1.9509452613369578, "percentage": 97.53, "elapsed_time": "14:18:52", "remaining_time": "0:21:45"} +{"current_steps": 3949, "total_steps": 4048, "loss": 0.2820609509944916, "lr": 3.3360721961952505e-08, "epoch": 1.951439515630792, "percentage": 97.55, "elapsed_time": "14:19:05", "remaining_time": "0:21:32"} +{"current_steps": 3950, "total_steps": 4048, "loss": 0.22128066420555115, "lr": 3.269720561691281e-08, "epoch": 1.9519337699246262, "percentage": 97.58, "elapsed_time": "14:19:18", "remaining_time": "0:21:19"} +{"current_steps": 3951, "total_steps": 4048, "loss": 0.2132534235715866, "lr": 3.204034330080319e-08, "epoch": 1.9524280242184604, "percentage": 97.6, "elapsed_time": "14:19:31", "remaining_time": "0:21:06"} +{"current_steps": 3952, "total_steps": 4048, "loss": 0.2308463454246521, "lr": 3.1390135452135095e-08, "epoch": 1.9529222785122946, "percentage": 97.63, "elapsed_time": "14:19:44", "remaining_time": "0:20:53"} +{"current_steps": 3953, "total_steps": 4048, "loss": 0.2756718397140503, "lr": 3.074658250497908e-08, "epoch": 1.9534165328061288, "percentage": 97.65, "elapsed_time": "14:19:57", "remaining_time": "0:20:40"} +{"current_steps": 3954, "total_steps": 4048, "loss": 0.24619412422180176, "lr": 3.010968488896149e-08, "epoch": 1.953910787099963, "percentage": 97.68, "elapsed_time": "14:20:11", "remaining_time": "0:20:26"} +{"current_steps": 3955, "total_steps": 4048, "loss": 0.2164454162120819, "lr": 2.9479443029265532e-08, "epoch": 1.9544050413937972, "percentage": 97.7, "elapsed_time": "14:20:23", "remaining_time": "0:20:13"} +{"current_steps": 3956, "total_steps": 4048, "loss": 0.2778991460800171, "lr": 2.8855857346632432e-08, "epoch": 1.9548992956876314, "percentage": 97.73, "elapsed_time": "14:20:36", "remaining_time": "0:20:00"} +{"current_steps": 3957, "total_steps": 4048, "loss": 0.22639301419258118, "lr": 2.8238928257359188e-08, "epoch": 1.9553935499814654, "percentage": 97.75, "elapsed_time": "14:20:49", "remaining_time": "0:19:47"} +{"current_steps": 3958, "total_steps": 4048, "loss": 0.2367630898952484, "lr": 2.7628656173297463e-08, "epoch": 1.9558878042752996, "percentage": 97.78, "elapsed_time": "14:21:02", "remaining_time": "0:19:34"} +{"current_steps": 3959, "total_steps": 4048, "loss": 0.2400333285331726, "lr": 2.702504150185692e-08, "epoch": 1.9563820585691338, "percentage": 97.8, "elapsed_time": "14:21:15", "remaining_time": "0:19:21"} +{"current_steps": 3960, "total_steps": 4048, "loss": 0.2384340763092041, "lr": 2.6428084646001884e-08, "epoch": 1.956876312862968, "percentage": 97.83, "elapsed_time": "14:21:27", "remaining_time": "0:19:08"} +{"current_steps": 3961, "total_steps": 4048, "loss": 0.20191673934459686, "lr": 2.5837786004253572e-08, "epoch": 1.957370567156802, "percentage": 97.85, "elapsed_time": "14:21:40", "remaining_time": "0:18:55"} +{"current_steps": 3962, "total_steps": 4048, "loss": 0.24700434505939484, "lr": 2.525414597068565e-08, "epoch": 1.9578648214506362, "percentage": 97.88, "elapsed_time": "14:21:53", "remaining_time": "0:18:42"} +{"current_steps": 3963, "total_steps": 4048, "loss": 0.20032359659671783, "lr": 2.4677164934928665e-08, "epoch": 1.9583590757444704, "percentage": 97.9, "elapsed_time": "14:22:06", "remaining_time": "0:18:29"} +{"current_steps": 3964, "total_steps": 4048, "loss": 0.280154287815094, "lr": 2.4106843282165615e-08, "epoch": 1.9588533300383046, "percentage": 97.92, "elapsed_time": "14:22:19", "remaining_time": "0:18:16"} +{"current_steps": 3965, "total_steps": 4048, "loss": 0.25518566370010376, "lr": 2.3543181393135274e-08, "epoch": 1.9593475843321388, "percentage": 97.95, "elapsed_time": "14:22:32", "remaining_time": "0:18:03"} +{"current_steps": 3966, "total_steps": 4048, "loss": 0.2246837019920349, "lr": 2.298617964413108e-08, "epoch": 1.959841838625973, "percentage": 97.97, "elapsed_time": "14:22:45", "remaining_time": "0:17:50"} +{"current_steps": 3967, "total_steps": 4048, "loss": 0.23355916142463684, "lr": 2.2435838407000034e-08, "epoch": 1.9603360929198073, "percentage": 98.0, "elapsed_time": "14:22:58", "remaining_time": "0:17:37"} +{"current_steps": 3968, "total_steps": 4048, "loss": 0.2449415922164917, "lr": 2.1892158049140467e-08, "epoch": 1.9608303472136415, "percentage": 98.02, "elapsed_time": "14:23:11", "remaining_time": "0:17:24"} +{"current_steps": 3969, "total_steps": 4048, "loss": 0.2269652783870697, "lr": 2.1355138933507602e-08, "epoch": 1.9613246015074757, "percentage": 98.05, "elapsed_time": "14:23:24", "remaining_time": "0:17:11"} +{"current_steps": 3970, "total_steps": 4048, "loss": 0.26923638582229614, "lr": 2.0824781418605776e-08, "epoch": 1.9618188558013099, "percentage": 98.07, "elapsed_time": "14:23:37", "remaining_time": "0:16:58"} +{"current_steps": 3971, "total_steps": 4048, "loss": 0.2631189823150635, "lr": 2.0301085858493996e-08, "epoch": 1.962313110095144, "percentage": 98.1, "elapsed_time": "14:23:50", "remaining_time": "0:16:45"} +{"current_steps": 3972, "total_steps": 4048, "loss": 0.23281526565551758, "lr": 1.978405260278593e-08, "epoch": 1.9628073643889783, "percentage": 98.12, "elapsed_time": "14:24:04", "remaining_time": "0:16:31"} +{"current_steps": 3973, "total_steps": 4048, "loss": 0.26399385929107666, "lr": 1.9273681996644365e-08, "epoch": 1.9633016186828123, "percentage": 98.15, "elapsed_time": "14:24:16", "remaining_time": "0:16:18"} +{"current_steps": 3974, "total_steps": 4048, "loss": 0.2641673684120178, "lr": 1.876997438078454e-08, "epoch": 1.9637958729766465, "percentage": 98.17, "elapsed_time": "14:24:30", "remaining_time": "0:16:05"} +{"current_steps": 3975, "total_steps": 4048, "loss": 0.22440402209758759, "lr": 1.8272930091476347e-08, "epoch": 1.9642901272704807, "percentage": 98.2, "elapsed_time": "14:24:42", "remaining_time": "0:15:52"} +{"current_steps": 3976, "total_steps": 4048, "loss": 0.2552195191383362, "lr": 1.778254946053881e-08, "epoch": 1.9647843815643147, "percentage": 98.22, "elapsed_time": "14:24:55", "remaining_time": "0:15:39"} +{"current_steps": 3977, "total_steps": 4048, "loss": 0.24455100297927856, "lr": 1.729883281534117e-08, "epoch": 1.9652786358581489, "percentage": 98.25, "elapsed_time": "14:25:08", "remaining_time": "0:15:26"} +{"current_steps": 3978, "total_steps": 4048, "loss": 0.2324603945016861, "lr": 1.6821780478808448e-08, "epoch": 1.965772890151983, "percentage": 98.27, "elapsed_time": "14:25:21", "remaining_time": "0:15:13"} +{"current_steps": 3979, "total_steps": 4048, "loss": 0.25488242506980896, "lr": 1.6351392769412556e-08, "epoch": 1.9662671444458173, "percentage": 98.3, "elapsed_time": "14:25:33", "remaining_time": "0:15:00"} +{"current_steps": 3980, "total_steps": 4048, "loss": 0.23511120676994324, "lr": 1.5887670001177856e-08, "epoch": 1.9667613987396515, "percentage": 98.32, "elapsed_time": "14:25:47", "remaining_time": "0:14:47"} +{"current_steps": 3981, "total_steps": 4048, "loss": 0.2683457136154175, "lr": 1.5430612483680052e-08, "epoch": 1.9672556530334857, "percentage": 98.34, "elapsed_time": "14:25:59", "remaining_time": "0:14:34"} +{"current_steps": 3982, "total_steps": 4048, "loss": 0.26627787947654724, "lr": 1.4980220522041734e-08, "epoch": 1.96774990732732, "percentage": 98.37, "elapsed_time": "14:26:12", "remaining_time": "0:14:21"} +{"current_steps": 3983, "total_steps": 4048, "loss": 0.22931841015815735, "lr": 1.4536494416940162e-08, "epoch": 1.9682441616211541, "percentage": 98.39, "elapsed_time": "14:26:25", "remaining_time": "0:14:08"} +{"current_steps": 3984, "total_steps": 4048, "loss": 0.22918352484703064, "lr": 1.4099434464600603e-08, "epoch": 1.9687384159149883, "percentage": 98.42, "elapsed_time": "14:26:39", "remaining_time": "0:13:55"} +{"current_steps": 3985, "total_steps": 4048, "loss": 0.2542854845523834, "lr": 1.3669040956797442e-08, "epoch": 1.9692326702088225, "percentage": 98.44, "elapsed_time": "14:26:52", "remaining_time": "0:13:42"} +{"current_steps": 3986, "total_steps": 4048, "loss": 0.21581681072711945, "lr": 1.3245314180854175e-08, "epoch": 1.9697269245026567, "percentage": 98.47, "elapsed_time": "14:27:05", "remaining_time": "0:13:29"} +{"current_steps": 3987, "total_steps": 4048, "loss": 0.2708613872528076, "lr": 1.2828254419646746e-08, "epoch": 1.970221178796491, "percentage": 98.49, "elapsed_time": "14:27:17", "remaining_time": "0:13:16"} +{"current_steps": 3988, "total_steps": 4048, "loss": 0.25348716974258423, "lr": 1.2417861951597998e-08, "epoch": 1.970715433090325, "percentage": 98.52, "elapsed_time": "14:27:31", "remaining_time": "0:13:03"} +{"current_steps": 3989, "total_steps": 4048, "loss": 0.24585089087486267, "lr": 1.2014137050677665e-08, "epoch": 1.9712096873841591, "percentage": 98.54, "elapsed_time": "14:27:43", "remaining_time": "0:12:50"} +{"current_steps": 3990, "total_steps": 4048, "loss": 0.26362112164497375, "lr": 1.1617079986410152e-08, "epoch": 1.9717039416779933, "percentage": 98.57, "elapsed_time": "14:27:57", "remaining_time": "0:12:37"} +{"current_steps": 3991, "total_steps": 4048, "loss": 0.23288659751415253, "lr": 1.1226691023862312e-08, "epoch": 1.9721981959718276, "percentage": 98.59, "elapsed_time": "14:28:09", "remaining_time": "0:12:23"} +{"current_steps": 3992, "total_steps": 4048, "loss": 0.21604478359222412, "lr": 1.0842970423654563e-08, "epoch": 1.9726924502656615, "percentage": 98.62, "elapsed_time": "14:28:23", "remaining_time": "0:12:10"} +{"current_steps": 3993, "total_steps": 4048, "loss": 0.21149985492229462, "lr": 1.0465918441950885e-08, "epoch": 1.9731867045594957, "percentage": 98.64, "elapsed_time": "14:28:36", "remaining_time": "0:11:57"} +{"current_steps": 3994, "total_steps": 4048, "loss": 0.26392504572868347, "lr": 1.0095535330467698e-08, "epoch": 1.97368095885333, "percentage": 98.67, "elapsed_time": "14:28:49", "remaining_time": "0:11:44"} +{"current_steps": 3995, "total_steps": 4048, "loss": 0.22993823885917664, "lr": 9.731821336466107e-09, "epoch": 1.9741752131471642, "percentage": 98.69, "elapsed_time": "14:29:02", "remaining_time": "0:11:31"} +{"current_steps": 3996, "total_steps": 4048, "loss": 0.2207789570093155, "lr": 9.374776702757438e-09, "epoch": 1.9746694674409984, "percentage": 98.72, "elapsed_time": "14:29:15", "remaining_time": "0:11:18"} +{"current_steps": 3997, "total_steps": 4048, "loss": 0.27149268984794617, "lr": 9.024401667698802e-09, "epoch": 1.9751637217348326, "percentage": 98.74, "elapsed_time": "14:29:29", "remaining_time": "0:11:05"} +{"current_steps": 3998, "total_steps": 4048, "loss": 0.269406795501709, "lr": 8.680696465196425e-09, "epoch": 1.9756579760286668, "percentage": 98.76, "elapsed_time": "14:29:41", "remaining_time": "0:10:52"} +{"current_steps": 3999, "total_steps": 4048, "loss": 0.25354713201522827, "lr": 8.343661324703434e-09, "epoch": 1.976152230322501, "percentage": 98.79, "elapsed_time": "14:29:55", "remaining_time": "0:10:39"} +{"current_steps": 4000, "total_steps": 4048, "loss": 0.22957751154899597, "lr": 8.013296471217624e-09, "epoch": 1.9766464846163352, "percentage": 98.81, "elapsed_time": "14:30:07", "remaining_time": "0:10:26"} +{"current_steps": 4001, "total_steps": 4048, "loss": 0.25355982780456543, "lr": 7.68960212528702e-09, "epoch": 1.9771407389101694, "percentage": 98.84, "elapsed_time": "14:30:27", "remaining_time": "0:10:13"} +{"current_steps": 4002, "total_steps": 4048, "loss": 0.2453315556049347, "lr": 7.372578503005434e-09, "epoch": 1.9776349932040036, "percentage": 98.86, "elapsed_time": "14:30:40", "remaining_time": "0:10:00"} +{"current_steps": 4003, "total_steps": 4048, "loss": 0.2274405062198639, "lr": 7.062225816013568e-09, "epoch": 1.9781292474978376, "percentage": 98.89, "elapsed_time": "14:30:54", "remaining_time": "0:09:47"} +{"current_steps": 4004, "total_steps": 4048, "loss": 0.22195965051651, "lr": 6.7585442714979136e-09, "epoch": 1.9786235017916718, "percentage": 98.91, "elapsed_time": "14:31:06", "remaining_time": "0:09:34"} +{"current_steps": 4005, "total_steps": 4048, "loss": 0.18664966523647308, "lr": 6.461534072191855e-09, "epoch": 1.979117756085506, "percentage": 98.94, "elapsed_time": "14:31:20", "remaining_time": "0:09:21"} +{"current_steps": 4006, "total_steps": 4048, "loss": 0.23385149240493774, "lr": 6.171195416375675e-09, "epoch": 1.9796120103793402, "percentage": 98.96, "elapsed_time": "14:31:33", "remaining_time": "0:09:08"} +{"current_steps": 4007, "total_steps": 4048, "loss": 0.3141595721244812, "lr": 5.887528497874328e-09, "epoch": 1.9801062646731742, "percentage": 98.99, "elapsed_time": "14:31:46", "remaining_time": "0:08:55"} +{"current_steps": 4008, "total_steps": 4048, "loss": 0.22392721474170685, "lr": 5.610533506060778e-09, "epoch": 1.9806005189670084, "percentage": 99.01, "elapsed_time": "14:31:59", "remaining_time": "0:08:42"} +{"current_steps": 4009, "total_steps": 4048, "loss": 0.24451547861099243, "lr": 5.34021062585377e-09, "epoch": 1.9810947732608426, "percentage": 99.04, "elapsed_time": "14:32:12", "remaining_time": "0:08:29"} +{"current_steps": 4010, "total_steps": 4048, "loss": 0.2916273772716522, "lr": 5.076560037714506e-09, "epoch": 1.9815890275546768, "percentage": 99.06, "elapsed_time": "14:32:26", "remaining_time": "0:08:16"} +{"current_steps": 4011, "total_steps": 4048, "loss": 0.25200486183166504, "lr": 4.819581917654414e-09, "epoch": 1.982083281848511, "percentage": 99.09, "elapsed_time": "14:32:38", "remaining_time": "0:08:02"} +{"current_steps": 4012, "total_steps": 4048, "loss": 0.24660873413085938, "lr": 4.569276437227377e-09, "epoch": 1.9825775361423452, "percentage": 99.11, "elapsed_time": "14:32:52", "remaining_time": "0:07:49"} +{"current_steps": 4013, "total_steps": 4048, "loss": 0.22276514768600464, "lr": 4.325643763534171e-09, "epoch": 1.9830717904361794, "percentage": 99.14, "elapsed_time": "14:33:05", "remaining_time": "0:07:36"} +{"current_steps": 4014, "total_steps": 4048, "loss": 0.28938305377960205, "lr": 4.088684059220249e-09, "epoch": 1.9835660447300136, "percentage": 99.16, "elapsed_time": "14:33:18", "remaining_time": "0:07:23"} +{"current_steps": 4015, "total_steps": 4048, "loss": 0.24640555679798126, "lr": 3.85839748247685e-09, "epoch": 1.9840602990238478, "percentage": 99.18, "elapsed_time": "14:33:31", "remaining_time": "0:07:10"} +{"current_steps": 4016, "total_steps": 4048, "loss": 0.24476927518844604, "lr": 3.6347841870398858e-09, "epoch": 1.984554553317682, "percentage": 99.21, "elapsed_time": "14:33:45", "remaining_time": "0:06:57"} +{"current_steps": 4017, "total_steps": 4048, "loss": 0.22534328699111938, "lr": 3.417844322189945e-09, "epoch": 1.9850488076115163, "percentage": 99.23, "elapsed_time": "14:33:58", "remaining_time": "0:06:44"} +{"current_steps": 4018, "total_steps": 4048, "loss": 0.25029847025871277, "lr": 3.2075780327534e-09, "epoch": 1.9855430619053505, "percentage": 99.26, "elapsed_time": "14:34:11", "remaining_time": "0:06:31"} +{"current_steps": 4019, "total_steps": 4048, "loss": 0.25584423542022705, "lr": 3.0039854591012994e-09, "epoch": 1.9860373161991844, "percentage": 99.28, "elapsed_time": "14:34:24", "remaining_time": "0:06:18"} +{"current_steps": 4020, "total_steps": 4048, "loss": 0.26148709654808044, "lr": 2.8070667371493663e-09, "epoch": 1.9865315704930187, "percentage": 99.31, "elapsed_time": "14:34:37", "remaining_time": "0:06:05"} +{"current_steps": 4021, "total_steps": 4048, "loss": 0.2510269284248352, "lr": 2.6168219983557786e-09, "epoch": 1.9870258247868529, "percentage": 99.33, "elapsed_time": "14:34:50", "remaining_time": "0:05:52"} +{"current_steps": 4022, "total_steps": 4048, "loss": 0.24044418334960938, "lr": 2.433251369727829e-09, "epoch": 1.987520079080687, "percentage": 99.36, "elapsed_time": "14:35:03", "remaining_time": "0:05:39"} +{"current_steps": 4023, "total_steps": 4048, "loss": 0.25793880224227905, "lr": 2.256354973813046e-09, "epoch": 1.988014333374521, "percentage": 99.38, "elapsed_time": "14:35:17", "remaining_time": "0:05:26"} +{"current_steps": 4024, "total_steps": 4048, "loss": 0.23637095093727112, "lr": 2.086132928705853e-09, "epoch": 1.9885085876683553, "percentage": 99.41, "elapsed_time": "14:35:29", "remaining_time": "0:05:13"} +{"current_steps": 4025, "total_steps": 4048, "loss": 0.29570624232292175, "lr": 1.9225853480431267e-09, "epoch": 1.9890028419621895, "percentage": 99.43, "elapsed_time": "14:35:43", "remaining_time": "0:05:00"} +{"current_steps": 4026, "total_steps": 4048, "loss": 0.2697899341583252, "lr": 1.7657123410075306e-09, "epoch": 1.9894970962560237, "percentage": 99.46, "elapsed_time": "14:35:56", "remaining_time": "0:04:47"} +{"current_steps": 4027, "total_steps": 4048, "loss": 0.28011834621429443, "lr": 1.615514012324182e-09, "epoch": 1.9899913505498579, "percentage": 99.48, "elapsed_time": "14:36:09", "remaining_time": "0:04:34"} +{"current_steps": 4028, "total_steps": 4048, "loss": 0.2770778238773346, "lr": 1.4719904622650939e-09, "epoch": 1.990485604843692, "percentage": 99.51, "elapsed_time": "14:36:22", "remaining_time": "0:04:21"} +{"current_steps": 4029, "total_steps": 4048, "loss": 0.2662060558795929, "lr": 1.335141786642513e-09, "epoch": 1.9909798591375263, "percentage": 99.53, "elapsed_time": "14:36:35", "remaining_time": "0:04:08"} +{"current_steps": 4030, "total_steps": 4048, "loss": 0.21188628673553467, "lr": 1.2049680768166928e-09, "epoch": 1.9914741134313605, "percentage": 99.56, "elapsed_time": "14:36:49", "remaining_time": "0:03:54"} +{"current_steps": 4031, "total_steps": 4048, "loss": 0.2749127745628357, "lr": 1.0814694196892294e-09, "epoch": 1.9919683677251947, "percentage": 99.58, "elapsed_time": "14:37:02", "remaining_time": "0:03:41"} +{"current_steps": 4032, "total_steps": 4048, "loss": 0.2271629124879837, "lr": 9.64645897704175e-10, "epoch": 1.992462622019029, "percentage": 99.6, "elapsed_time": "14:37:15", "remaining_time": "0:03:28"} +{"current_steps": 4033, "total_steps": 4048, "loss": 0.2704155743122101, "lr": 8.544975888535867e-10, "epoch": 1.9929568763128631, "percentage": 99.63, "elapsed_time": "14:37:28", "remaining_time": "0:03:15"} +{"current_steps": 4034, "total_steps": 4048, "loss": 0.23073506355285645, "lr": 7.51024566670866e-10, "epoch": 1.993451130606697, "percentage": 99.65, "elapsed_time": "14:37:41", "remaining_time": "0:03:02"} +{"current_steps": 4035, "total_steps": 4048, "loss": 0.21597059071063995, "lr": 6.542269002307589e-10, "epoch": 1.9939453849005313, "percentage": 99.68, "elapsed_time": "14:37:54", "remaining_time": "0:02:49"} +{"current_steps": 4036, "total_steps": 4048, "loss": 0.21368899941444397, "lr": 5.641046541560169e-10, "epoch": 1.9944396391943655, "percentage": 99.7, "elapsed_time": "14:38:08", "remaining_time": "0:02:36"} +{"current_steps": 4037, "total_steps": 4048, "loss": 0.2594050168991089, "lr": 4.806578886107361e-10, "epoch": 1.9949338934881997, "percentage": 99.73, "elapsed_time": "14:38:21", "remaining_time": "0:02:23"} +{"current_steps": 4038, "total_steps": 4048, "loss": 0.21630799770355225, "lr": 4.03886659302577e-10, "epoch": 1.9954281477820337, "percentage": 99.75, "elapsed_time": "14:38:34", "remaining_time": "0:02:10"} +{"current_steps": 4039, "total_steps": 4048, "loss": 0.25168266892433167, "lr": 3.337910174827652e-10, "epoch": 1.995922402075868, "percentage": 99.78, "elapsed_time": "14:38:47", "remaining_time": "0:01:57"} +{"current_steps": 4040, "total_steps": 4048, "loss": 0.27963966131210327, "lr": 2.70371009946091e-10, "epoch": 1.9964166563697021, "percentage": 99.8, "elapsed_time": "14:39:01", "remaining_time": "0:01:44"} +{"current_steps": 4041, "total_steps": 4048, "loss": 0.2196345329284668, "lr": 2.1362667903090938e-10, "epoch": 1.9969109106635363, "percentage": 99.83, "elapsed_time": "14:39:14", "remaining_time": "0:01:31"} +{"current_steps": 4042, "total_steps": 4048, "loss": 0.22677919268608093, "lr": 1.6355806262025043e-10, "epoch": 1.9974051649573705, "percentage": 99.85, "elapsed_time": "14:39:27", "remaining_time": "0:01:18"} +{"current_steps": 4043, "total_steps": 4048, "loss": 0.23044565320014954, "lr": 1.2016519413626804e-10, "epoch": 1.9978994192512047, "percentage": 99.88, "elapsed_time": "14:39:40", "remaining_time": "0:01:05"} +{"current_steps": 4044, "total_steps": 4048, "loss": 0.2739316523075104, "lr": 8.344810255023206e-11, "epoch": 1.998393673545039, "percentage": 99.9, "elapsed_time": "14:39:54", "remaining_time": "0:00:52"} +{"current_steps": 4045, "total_steps": 4048, "loss": 0.2610301673412323, "lr": 5.3406812372536196e-11, "epoch": 1.9988879278388731, "percentage": 99.93, "elapsed_time": "14:40:07", "remaining_time": "0:00:39"} +{"current_steps": 4046, "total_steps": 4048, "loss": 0.23652175068855286, "lr": 3.004134365824918e-11, "epoch": 1.9993821821327074, "percentage": 99.95, "elapsed_time": "14:40:20", "remaining_time": "0:00:26"} +{"current_steps": 4047, "total_steps": 4048, "loss": 0.23983967304229736, "lr": 1.3351712006004492e-11, "epoch": 1.9998764364265416, "percentage": 99.98, "elapsed_time": "14:40:34", "remaining_time": "0:00:13"} +{"current_steps": 4048, "total_steps": 4048, "loss": 0.2640778720378876, "lr": 3.337928559110637e-12, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "14:40:36", "remaining_time": "0:00:00"} +{"current_steps": 4048, "total_steps": 4048, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "14:40:42", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9f284c249728884dd953c87789bea8a6aeda022b --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,28379 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 4048, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0004942542938341777, + "grad_norm": 1.7827389996067007, + "learning_rate": 0.0, + "loss": 1.1816105842590332, + "step": 1 + }, + { + "epoch": 0.0009885085876683553, + "grad_norm": 1.891128580111598, + "learning_rate": 9.852216748768474e-08, + "loss": 1.1496102809906006, + "step": 2 + }, + { + "epoch": 0.001482762881502533, + "grad_norm": 1.8581340535316004, + "learning_rate": 1.9704433497536947e-07, + "loss": 1.1515967845916748, + "step": 3 + }, + { + "epoch": 0.0019770171753367106, + "grad_norm": 1.708604556953044, + "learning_rate": 2.955665024630542e-07, + "loss": 1.1795943975448608, + "step": 4 + }, + { + "epoch": 0.0024712714691708885, + "grad_norm": 1.8513528590958555, + "learning_rate": 3.9408866995073894e-07, + "loss": 1.2289564609527588, + "step": 5 + }, + { + "epoch": 0.002965525763005066, + "grad_norm": 1.972324289049384, + "learning_rate": 4.926108374384237e-07, + "loss": 1.179269790649414, + "step": 6 + }, + { + "epoch": 0.003459780056839244, + "grad_norm": 1.8334156798400192, + "learning_rate": 5.911330049261084e-07, + "loss": 1.199608564376831, + "step": 7 + }, + { + "epoch": 0.003954034350673421, + "grad_norm": 1.6669436389627912, + "learning_rate": 6.896551724137931e-07, + "loss": 1.1643707752227783, + "step": 8 + }, + { + "epoch": 0.004448288644507599, + "grad_norm": 1.8750060934609654, + "learning_rate": 7.881773399014779e-07, + "loss": 1.1264240741729736, + "step": 9 + }, + { + "epoch": 0.004942542938341777, + "grad_norm": 1.9962482953672744, + "learning_rate": 8.866995073891626e-07, + "loss": 1.1717555522918701, + "step": 10 + }, + { + "epoch": 0.005436797232175955, + "grad_norm": 1.895693583554434, + "learning_rate": 9.852216748768474e-07, + "loss": 1.1856712102890015, + "step": 11 + }, + { + "epoch": 0.005931051526010132, + "grad_norm": 1.7765248738469863, + "learning_rate": 1.0837438423645322e-06, + "loss": 1.1258785724639893, + "step": 12 + }, + { + "epoch": 0.00642530581984431, + "grad_norm": 1.8326605479421993, + "learning_rate": 1.1822660098522167e-06, + "loss": 1.1333656311035156, + "step": 13 + }, + { + "epoch": 0.006919560113678488, + "grad_norm": 1.9142537067819894, + "learning_rate": 1.2807881773399017e-06, + "loss": 1.2281363010406494, + "step": 14 + }, + { + "epoch": 0.0074138144075126654, + "grad_norm": 1.9232318367357442, + "learning_rate": 1.3793103448275862e-06, + "loss": 1.1910676956176758, + "step": 15 + }, + { + "epoch": 0.007908068701346842, + "grad_norm": 2.5599273269087885, + "learning_rate": 1.4778325123152712e-06, + "loss": 1.2124552726745605, + "step": 16 + }, + { + "epoch": 0.008402322995181021, + "grad_norm": 2.2109761155287133, + "learning_rate": 1.5763546798029558e-06, + "loss": 1.1993463039398193, + "step": 17 + }, + { + "epoch": 0.008896577289015198, + "grad_norm": 2.1999117305307077, + "learning_rate": 1.6748768472906405e-06, + "loss": 1.1245683431625366, + "step": 18 + }, + { + "epoch": 0.009390831582849375, + "grad_norm": 2.203478389299074, + "learning_rate": 1.7733990147783253e-06, + "loss": 1.1838568449020386, + "step": 19 + }, + { + "epoch": 0.009885085876683554, + "grad_norm": 2.419107047950166, + "learning_rate": 1.8719211822660098e-06, + "loss": 1.081169843673706, + "step": 20 + }, + { + "epoch": 0.010379340170517731, + "grad_norm": 2.559921706815215, + "learning_rate": 1.970443349753695e-06, + "loss": 1.1506569385528564, + "step": 21 + }, + { + "epoch": 0.01087359446435191, + "grad_norm": 2.8697838151244977, + "learning_rate": 2.0689655172413796e-06, + "loss": 1.0841327905654907, + "step": 22 + }, + { + "epoch": 0.011367848758186087, + "grad_norm": 2.8012936510978905, + "learning_rate": 2.1674876847290643e-06, + "loss": 1.1335525512695312, + "step": 23 + }, + { + "epoch": 0.011862103052020264, + "grad_norm": 2.649521736906966, + "learning_rate": 2.266009852216749e-06, + "loss": 1.035188913345337, + "step": 24 + }, + { + "epoch": 0.012356357345854442, + "grad_norm": 2.7385314170591166, + "learning_rate": 2.3645320197044334e-06, + "loss": 1.0640877485275269, + "step": 25 + }, + { + "epoch": 0.01285061163968862, + "grad_norm": 2.5011806151261755, + "learning_rate": 2.4630541871921186e-06, + "loss": 1.0479273796081543, + "step": 26 + }, + { + "epoch": 0.013344865933522798, + "grad_norm": 2.236670838822209, + "learning_rate": 2.5615763546798034e-06, + "loss": 1.0522505044937134, + "step": 27 + }, + { + "epoch": 0.013839120227356975, + "grad_norm": 2.065544668093392, + "learning_rate": 2.660098522167488e-06, + "loss": 1.080836296081543, + "step": 28 + }, + { + "epoch": 0.014333374521191152, + "grad_norm": 1.7478242928012908, + "learning_rate": 2.7586206896551725e-06, + "loss": 0.9712544679641724, + "step": 29 + }, + { + "epoch": 0.014827628815025331, + "grad_norm": 1.5930614486695707, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.0469061136245728, + "step": 30 + }, + { + "epoch": 0.015321883108859508, + "grad_norm": 1.380137621152324, + "learning_rate": 2.9556650246305424e-06, + "loss": 0.9911116361618042, + "step": 31 + }, + { + "epoch": 0.015816137402693685, + "grad_norm": 1.3167918112915387, + "learning_rate": 3.054187192118227e-06, + "loss": 0.9552959203720093, + "step": 32 + }, + { + "epoch": 0.016310391696527864, + "grad_norm": 1.2266567383194062, + "learning_rate": 3.1527093596059115e-06, + "loss": 0.957429051399231, + "step": 33 + }, + { + "epoch": 0.016804645990362042, + "grad_norm": 1.305011449405004, + "learning_rate": 3.2512315270935963e-06, + "loss": 1.0180628299713135, + "step": 34 + }, + { + "epoch": 0.017298900284196218, + "grad_norm": 1.2347397961596738, + "learning_rate": 3.349753694581281e-06, + "loss": 0.9064415097236633, + "step": 35 + }, + { + "epoch": 0.017793154578030396, + "grad_norm": 1.216758814553776, + "learning_rate": 3.448275862068966e-06, + "loss": 0.9718184471130371, + "step": 36 + }, + { + "epoch": 0.018287408871864575, + "grad_norm": 1.065779121444896, + "learning_rate": 3.5467980295566506e-06, + "loss": 0.8831444978713989, + "step": 37 + }, + { + "epoch": 0.01878166316569875, + "grad_norm": 1.0132491929086573, + "learning_rate": 3.6453201970443354e-06, + "loss": 0.9167139530181885, + "step": 38 + }, + { + "epoch": 0.01927591745953293, + "grad_norm": 1.0431186403983612, + "learning_rate": 3.7438423645320197e-06, + "loss": 0.9322037696838379, + "step": 39 + }, + { + "epoch": 0.019770171753367108, + "grad_norm": 1.0319066435292568, + "learning_rate": 3.842364532019705e-06, + "loss": 0.9189817905426025, + "step": 40 + }, + { + "epoch": 0.020264426047201287, + "grad_norm": 1.1670657884595383, + "learning_rate": 3.94088669950739e-06, + "loss": 0.8480448126792908, + "step": 41 + }, + { + "epoch": 0.020758680341035462, + "grad_norm": 0.9850175889441174, + "learning_rate": 4.039408866995074e-06, + "loss": 0.8907301425933838, + "step": 42 + }, + { + "epoch": 0.02125293463486964, + "grad_norm": 1.0028387912933743, + "learning_rate": 4.137931034482759e-06, + "loss": 0.8674390316009521, + "step": 43 + }, + { + "epoch": 0.02174718892870382, + "grad_norm": 0.9822966394815191, + "learning_rate": 4.236453201970444e-06, + "loss": 0.8674882054328918, + "step": 44 + }, + { + "epoch": 0.022241443222537995, + "grad_norm": 0.9778327665239519, + "learning_rate": 4.334975369458129e-06, + "loss": 0.8542560338973999, + "step": 45 + }, + { + "epoch": 0.022735697516372173, + "grad_norm": 0.8621828386281931, + "learning_rate": 4.4334975369458135e-06, + "loss": 0.772778332233429, + "step": 46 + }, + { + "epoch": 0.023229951810206352, + "grad_norm": 0.8638093364937629, + "learning_rate": 4.532019704433498e-06, + "loss": 0.7481152415275574, + "step": 47 + }, + { + "epoch": 0.023724206104040527, + "grad_norm": 0.8467972866728939, + "learning_rate": 4.630541871921182e-06, + "loss": 0.8373709917068481, + "step": 48 + }, + { + "epoch": 0.024218460397874706, + "grad_norm": 0.8165134857986008, + "learning_rate": 4.729064039408867e-06, + "loss": 0.8163385391235352, + "step": 49 + }, + { + "epoch": 0.024712714691708885, + "grad_norm": 0.833026336683437, + "learning_rate": 4.8275862068965525e-06, + "loss": 0.7444975972175598, + "step": 50 + }, + { + "epoch": 0.025206968985543064, + "grad_norm": 0.858591041664589, + "learning_rate": 4.926108374384237e-06, + "loss": 0.7683243751525879, + "step": 51 + }, + { + "epoch": 0.02570122327937724, + "grad_norm": 1.0127725906591662, + "learning_rate": 5.024630541871922e-06, + "loss": 0.806761622428894, + "step": 52 + }, + { + "epoch": 0.026195477573211418, + "grad_norm": 0.8333649125881921, + "learning_rate": 5.123152709359607e-06, + "loss": 0.7312102913856506, + "step": 53 + }, + { + "epoch": 0.026689731867045596, + "grad_norm": 0.9425883709792775, + "learning_rate": 5.2216748768472915e-06, + "loss": 0.7351999282836914, + "step": 54 + }, + { + "epoch": 0.02718398616087977, + "grad_norm": 0.9039627787948463, + "learning_rate": 5.320197044334976e-06, + "loss": 0.7453763484954834, + "step": 55 + }, + { + "epoch": 0.02767824045471395, + "grad_norm": 0.9324665454088699, + "learning_rate": 5.41871921182266e-06, + "loss": 0.7063292860984802, + "step": 56 + }, + { + "epoch": 0.02817249474854813, + "grad_norm": 0.8343256198457882, + "learning_rate": 5.517241379310345e-06, + "loss": 0.7145994901657104, + "step": 57 + }, + { + "epoch": 0.028666749042382304, + "grad_norm": 0.7157092163314197, + "learning_rate": 5.61576354679803e-06, + "loss": 0.687594473361969, + "step": 58 + }, + { + "epoch": 0.029161003336216483, + "grad_norm": 0.7603582128739335, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.6643895506858826, + "step": 59 + }, + { + "epoch": 0.029655257630050662, + "grad_norm": 0.6925073694472516, + "learning_rate": 5.812807881773399e-06, + "loss": 0.6781614422798157, + "step": 60 + }, + { + "epoch": 0.030149511923884837, + "grad_norm": 0.7169709854131228, + "learning_rate": 5.911330049261085e-06, + "loss": 0.6209158301353455, + "step": 61 + }, + { + "epoch": 0.030643766217719016, + "grad_norm": 0.6749920715098945, + "learning_rate": 6.00985221674877e-06, + "loss": 0.6424679756164551, + "step": 62 + }, + { + "epoch": 0.031138020511553195, + "grad_norm": 0.6435584468821339, + "learning_rate": 6.108374384236454e-06, + "loss": 0.6745971441268921, + "step": 63 + }, + { + "epoch": 0.03163227480538737, + "grad_norm": 0.657544191989632, + "learning_rate": 6.206896551724138e-06, + "loss": 0.6520330905914307, + "step": 64 + }, + { + "epoch": 0.03212652909922155, + "grad_norm": 0.6351335823908374, + "learning_rate": 6.305418719211823e-06, + "loss": 0.6790571212768555, + "step": 65 + }, + { + "epoch": 0.03262078339305573, + "grad_norm": 0.6484215339353426, + "learning_rate": 6.403940886699508e-06, + "loss": 0.6491506099700928, + "step": 66 + }, + { + "epoch": 0.033115037686889906, + "grad_norm": 0.617685895397393, + "learning_rate": 6.502463054187193e-06, + "loss": 0.6347313523292542, + "step": 67 + }, + { + "epoch": 0.033609291980724085, + "grad_norm": 0.6638567270691007, + "learning_rate": 6.600985221674877e-06, + "loss": 0.6785881519317627, + "step": 68 + }, + { + "epoch": 0.034103546274558263, + "grad_norm": 0.6459369268846485, + "learning_rate": 6.699507389162562e-06, + "loss": 0.6470085978507996, + "step": 69 + }, + { + "epoch": 0.034597800568392435, + "grad_norm": 0.6364523697931875, + "learning_rate": 6.798029556650246e-06, + "loss": 0.6205961108207703, + "step": 70 + }, + { + "epoch": 0.035092054862226614, + "grad_norm": 0.6434045969551643, + "learning_rate": 6.896551724137932e-06, + "loss": 0.6621580123901367, + "step": 71 + }, + { + "epoch": 0.03558630915606079, + "grad_norm": 0.6281362500041567, + "learning_rate": 6.995073891625616e-06, + "loss": 0.6363088488578796, + "step": 72 + }, + { + "epoch": 0.03608056344989497, + "grad_norm": 0.6023389614758552, + "learning_rate": 7.093596059113301e-06, + "loss": 0.6073004007339478, + "step": 73 + }, + { + "epoch": 0.03657481774372915, + "grad_norm": 0.5962790573618366, + "learning_rate": 7.192118226600986e-06, + "loss": 0.6490880846977234, + "step": 74 + }, + { + "epoch": 0.03706907203756333, + "grad_norm": 0.6425224117743127, + "learning_rate": 7.290640394088671e-06, + "loss": 0.6540624499320984, + "step": 75 + }, + { + "epoch": 0.0375633263313975, + "grad_norm": 0.6885040620745063, + "learning_rate": 7.3891625615763555e-06, + "loss": 0.6237976551055908, + "step": 76 + }, + { + "epoch": 0.03805758062523168, + "grad_norm": 0.6110947192931153, + "learning_rate": 7.487684729064039e-06, + "loss": 0.6121219992637634, + "step": 77 + }, + { + "epoch": 0.03855183491906586, + "grad_norm": 0.6031847840211293, + "learning_rate": 7.586206896551724e-06, + "loss": 0.5785888433456421, + "step": 78 + }, + { + "epoch": 0.03904608921290004, + "grad_norm": 0.645073431050071, + "learning_rate": 7.68472906403941e-06, + "loss": 0.6144810914993286, + "step": 79 + }, + { + "epoch": 0.039540343506734216, + "grad_norm": 0.709404375816405, + "learning_rate": 7.783251231527095e-06, + "loss": 0.6522500514984131, + "step": 80 + }, + { + "epoch": 0.040034597800568394, + "grad_norm": 0.6784602446095636, + "learning_rate": 7.88177339901478e-06, + "loss": 0.6126501560211182, + "step": 81 + }, + { + "epoch": 0.04052885209440257, + "grad_norm": 0.6834338295248128, + "learning_rate": 7.980295566502464e-06, + "loss": 0.573388934135437, + "step": 82 + }, + { + "epoch": 0.041023106388236745, + "grad_norm": 0.7128627750045655, + "learning_rate": 8.078817733990149e-06, + "loss": 0.6462322473526001, + "step": 83 + }, + { + "epoch": 0.041517360682070924, + "grad_norm": 0.6985575396830678, + "learning_rate": 8.177339901477834e-06, + "loss": 0.6542905569076538, + "step": 84 + }, + { + "epoch": 0.0420116149759051, + "grad_norm": 0.6800738258763197, + "learning_rate": 8.275862068965518e-06, + "loss": 0.6539976000785828, + "step": 85 + }, + { + "epoch": 0.04250586926973928, + "grad_norm": 0.6805451756514653, + "learning_rate": 8.374384236453203e-06, + "loss": 0.6303049325942993, + "step": 86 + }, + { + "epoch": 0.04300012356357346, + "grad_norm": 0.6262637687675628, + "learning_rate": 8.472906403940888e-06, + "loss": 0.5727078318595886, + "step": 87 + }, + { + "epoch": 0.04349437785740764, + "grad_norm": 0.6392194157453778, + "learning_rate": 8.571428571428571e-06, + "loss": 0.6204914450645447, + "step": 88 + }, + { + "epoch": 0.04398863215124181, + "grad_norm": 0.8144620373591464, + "learning_rate": 8.669950738916257e-06, + "loss": 0.633359432220459, + "step": 89 + }, + { + "epoch": 0.04448288644507599, + "grad_norm": 0.6564252660453104, + "learning_rate": 8.768472906403942e-06, + "loss": 0.5737719535827637, + "step": 90 + }, + { + "epoch": 0.04497714073891017, + "grad_norm": 0.704224097621618, + "learning_rate": 8.866995073891627e-06, + "loss": 0.6438707709312439, + "step": 91 + }, + { + "epoch": 0.04547139503274435, + "grad_norm": 0.7123681566966987, + "learning_rate": 8.965517241379312e-06, + "loss": 0.6284823417663574, + "step": 92 + }, + { + "epoch": 0.045965649326578525, + "grad_norm": 0.6879682376399587, + "learning_rate": 9.064039408866996e-06, + "loss": 0.6442058086395264, + "step": 93 + }, + { + "epoch": 0.046459903620412704, + "grad_norm": 0.709934515039082, + "learning_rate": 9.162561576354681e-06, + "loss": 0.5821751356124878, + "step": 94 + }, + { + "epoch": 0.04695415791424688, + "grad_norm": 1.530236961676562, + "learning_rate": 9.261083743842364e-06, + "loss": 0.546042263507843, + "step": 95 + }, + { + "epoch": 0.047448412208081055, + "grad_norm": 0.6844457378175872, + "learning_rate": 9.359605911330049e-06, + "loss": 0.5743244886398315, + "step": 96 + }, + { + "epoch": 0.04794266650191523, + "grad_norm": 0.6876016450255833, + "learning_rate": 9.458128078817734e-06, + "loss": 0.5775831341743469, + "step": 97 + }, + { + "epoch": 0.04843692079574941, + "grad_norm": 0.6367125491834975, + "learning_rate": 9.55665024630542e-06, + "loss": 0.5632016658782959, + "step": 98 + }, + { + "epoch": 0.04893117508958359, + "grad_norm": 0.635357516984843, + "learning_rate": 9.655172413793105e-06, + "loss": 0.5817564129829407, + "step": 99 + }, + { + "epoch": 0.04942542938341777, + "grad_norm": 0.6380730461382318, + "learning_rate": 9.75369458128079e-06, + "loss": 0.5692225098609924, + "step": 100 + }, + { + "epoch": 0.04991968367725195, + "grad_norm": 0.6016319910280624, + "learning_rate": 9.852216748768475e-06, + "loss": 0.5239434242248535, + "step": 101 + }, + { + "epoch": 0.05041393797108613, + "grad_norm": 0.6757811368400487, + "learning_rate": 9.95073891625616e-06, + "loss": 0.543138861656189, + "step": 102 + }, + { + "epoch": 0.0509081922649203, + "grad_norm": 0.6907500926239555, + "learning_rate": 1.0049261083743844e-05, + "loss": 0.5914052128791809, + "step": 103 + }, + { + "epoch": 0.05140244655875448, + "grad_norm": 0.657964391130701, + "learning_rate": 1.0147783251231529e-05, + "loss": 0.5394442081451416, + "step": 104 + }, + { + "epoch": 0.051896700852588656, + "grad_norm": 0.6411875370567456, + "learning_rate": 1.0246305418719214e-05, + "loss": 0.6157902479171753, + "step": 105 + }, + { + "epoch": 0.052390955146422835, + "grad_norm": 0.738818036033501, + "learning_rate": 1.0344827586206898e-05, + "loss": 0.5863415598869324, + "step": 106 + }, + { + "epoch": 0.052885209440257014, + "grad_norm": 0.7066380161278255, + "learning_rate": 1.0443349753694583e-05, + "loss": 0.5783145427703857, + "step": 107 + }, + { + "epoch": 0.05337946373409119, + "grad_norm": 0.6486663261886427, + "learning_rate": 1.0541871921182268e-05, + "loss": 0.5761469006538391, + "step": 108 + }, + { + "epoch": 0.053873718027925364, + "grad_norm": 0.7011826885785277, + "learning_rate": 1.0640394088669953e-05, + "loss": 0.5931205749511719, + "step": 109 + }, + { + "epoch": 0.05436797232175954, + "grad_norm": 0.6624296231637669, + "learning_rate": 1.0738916256157637e-05, + "loss": 0.5429986119270325, + "step": 110 + }, + { + "epoch": 0.05486222661559372, + "grad_norm": 0.758180242025479, + "learning_rate": 1.083743842364532e-05, + "loss": 0.5154455304145813, + "step": 111 + }, + { + "epoch": 0.0553564809094279, + "grad_norm": 0.6631694030017043, + "learning_rate": 1.0935960591133005e-05, + "loss": 0.5465028285980225, + "step": 112 + }, + { + "epoch": 0.05585073520326208, + "grad_norm": 0.7234030186547562, + "learning_rate": 1.103448275862069e-05, + "loss": 0.5973349213600159, + "step": 113 + }, + { + "epoch": 0.05634498949709626, + "grad_norm": 0.8062494007312124, + "learning_rate": 1.1133004926108375e-05, + "loss": 0.6201578378677368, + "step": 114 + }, + { + "epoch": 0.05683924379093044, + "grad_norm": 0.7754913697435033, + "learning_rate": 1.123152709359606e-05, + "loss": 0.5090143084526062, + "step": 115 + }, + { + "epoch": 0.05733349808476461, + "grad_norm": 0.7128751966577052, + "learning_rate": 1.1330049261083744e-05, + "loss": 0.5275869369506836, + "step": 116 + }, + { + "epoch": 0.05782775237859879, + "grad_norm": 0.6950533949454222, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.571302056312561, + "step": 117 + }, + { + "epoch": 0.058322006672432966, + "grad_norm": 0.727683614551879, + "learning_rate": 1.1527093596059114e-05, + "loss": 0.5920293927192688, + "step": 118 + }, + { + "epoch": 0.058816260966267145, + "grad_norm": 0.7151674344713859, + "learning_rate": 1.1625615763546799e-05, + "loss": 0.5877068042755127, + "step": 119 + }, + { + "epoch": 0.059310515260101324, + "grad_norm": 0.7467125629300125, + "learning_rate": 1.1724137931034483e-05, + "loss": 0.6140042543411255, + "step": 120 + }, + { + "epoch": 0.0598047695539355, + "grad_norm": 0.7531213899377466, + "learning_rate": 1.182266009852217e-05, + "loss": 0.5642052292823792, + "step": 121 + }, + { + "epoch": 0.060299023847769674, + "grad_norm": 0.7258097143889621, + "learning_rate": 1.1921182266009855e-05, + "loss": 0.5535261034965515, + "step": 122 + }, + { + "epoch": 0.06079327814160385, + "grad_norm": 0.6906824437380253, + "learning_rate": 1.201970443349754e-05, + "loss": 0.5202849507331848, + "step": 123 + }, + { + "epoch": 0.06128753243543803, + "grad_norm": 0.7290752273219125, + "learning_rate": 1.2118226600985224e-05, + "loss": 0.5626791715621948, + "step": 124 + }, + { + "epoch": 0.06178178672927221, + "grad_norm": 0.6770400510110369, + "learning_rate": 1.2216748768472909e-05, + "loss": 0.5416101217269897, + "step": 125 + }, + { + "epoch": 0.06227604102310639, + "grad_norm": 0.730080694043851, + "learning_rate": 1.2315270935960592e-05, + "loss": 0.5683388710021973, + "step": 126 + }, + { + "epoch": 0.06277029531694056, + "grad_norm": 0.7617011668537459, + "learning_rate": 1.2413793103448277e-05, + "loss": 0.564468264579773, + "step": 127 + }, + { + "epoch": 0.06326454961077474, + "grad_norm": 0.7085057216007719, + "learning_rate": 1.2512315270935961e-05, + "loss": 0.5419844388961792, + "step": 128 + }, + { + "epoch": 0.06375880390460892, + "grad_norm": 0.7653624040034734, + "learning_rate": 1.2610837438423646e-05, + "loss": 0.51283860206604, + "step": 129 + }, + { + "epoch": 0.0642530581984431, + "grad_norm": 0.8138449595397697, + "learning_rate": 1.2709359605911331e-05, + "loss": 0.5807296633720398, + "step": 130 + }, + { + "epoch": 0.06474731249227728, + "grad_norm": 0.6723079879875923, + "learning_rate": 1.2807881773399016e-05, + "loss": 0.5277815461158752, + "step": 131 + }, + { + "epoch": 0.06524156678611145, + "grad_norm": 0.6681532618442926, + "learning_rate": 1.29064039408867e-05, + "loss": 0.5044680833816528, + "step": 132 + }, + { + "epoch": 0.06573582107994563, + "grad_norm": 0.753382083900827, + "learning_rate": 1.3004926108374385e-05, + "loss": 0.5412886738777161, + "step": 133 + }, + { + "epoch": 0.06623007537377981, + "grad_norm": 0.7168767227212489, + "learning_rate": 1.310344827586207e-05, + "loss": 0.5314532518386841, + "step": 134 + }, + { + "epoch": 0.06672432966761399, + "grad_norm": 0.8393067756176276, + "learning_rate": 1.3201970443349755e-05, + "loss": 0.5544138550758362, + "step": 135 + }, + { + "epoch": 0.06721858396144817, + "grad_norm": 0.7720251101355328, + "learning_rate": 1.330049261083744e-05, + "loss": 0.5745705366134644, + "step": 136 + }, + { + "epoch": 0.06771283825528235, + "grad_norm": 0.8433611027798503, + "learning_rate": 1.3399014778325124e-05, + "loss": 0.5361800789833069, + "step": 137 + }, + { + "epoch": 0.06820709254911653, + "grad_norm": 0.7945865329579561, + "learning_rate": 1.3497536945812807e-05, + "loss": 0.5878221392631531, + "step": 138 + }, + { + "epoch": 0.06870134684295069, + "grad_norm": 0.7847520309491554, + "learning_rate": 1.3596059113300492e-05, + "loss": 0.5952787399291992, + "step": 139 + }, + { + "epoch": 0.06919560113678487, + "grad_norm": 0.7556944357281568, + "learning_rate": 1.369458128078818e-05, + "loss": 0.5334340929985046, + "step": 140 + }, + { + "epoch": 0.06968985543061905, + "grad_norm": 0.7730405260844581, + "learning_rate": 1.3793103448275863e-05, + "loss": 0.5297533273696899, + "step": 141 + }, + { + "epoch": 0.07018410972445323, + "grad_norm": 0.7838373123609123, + "learning_rate": 1.3891625615763548e-05, + "loss": 0.5388105511665344, + "step": 142 + }, + { + "epoch": 0.0706783640182874, + "grad_norm": 0.6827867428906486, + "learning_rate": 1.3990147783251233e-05, + "loss": 0.484375536441803, + "step": 143 + }, + { + "epoch": 0.07117261831212159, + "grad_norm": 0.7377838543831393, + "learning_rate": 1.4088669950738918e-05, + "loss": 0.5395358800888062, + "step": 144 + }, + { + "epoch": 0.07166687260595576, + "grad_norm": 0.7024037339686016, + "learning_rate": 1.4187192118226602e-05, + "loss": 0.501459538936615, + "step": 145 + }, + { + "epoch": 0.07216112689978994, + "grad_norm": 0.7544878056630825, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.5390491485595703, + "step": 146 + }, + { + "epoch": 0.07265538119362412, + "grad_norm": 0.7358581376182646, + "learning_rate": 1.4384236453201972e-05, + "loss": 0.505649745464325, + "step": 147 + }, + { + "epoch": 0.0731496354874583, + "grad_norm": 0.791834759029257, + "learning_rate": 1.4482758620689657e-05, + "loss": 0.5155121684074402, + "step": 148 + }, + { + "epoch": 0.07364388978129248, + "grad_norm": 0.9182625859668322, + "learning_rate": 1.4581280788177341e-05, + "loss": 0.5502114295959473, + "step": 149 + }, + { + "epoch": 0.07413814407512666, + "grad_norm": 0.7705513444985356, + "learning_rate": 1.4679802955665026e-05, + "loss": 0.5243497490882874, + "step": 150 + }, + { + "epoch": 0.07463239836896084, + "grad_norm": 0.7936247647794451, + "learning_rate": 1.4778325123152711e-05, + "loss": 0.529721736907959, + "step": 151 + }, + { + "epoch": 0.075126652662795, + "grad_norm": 0.7493387955752852, + "learning_rate": 1.4876847290640396e-05, + "loss": 0.4721008241176605, + "step": 152 + }, + { + "epoch": 0.07562090695662918, + "grad_norm": 0.8448372107109295, + "learning_rate": 1.4975369458128079e-05, + "loss": 0.46029576659202576, + "step": 153 + }, + { + "epoch": 0.07611516125046336, + "grad_norm": 0.8666504632745452, + "learning_rate": 1.5073891625615764e-05, + "loss": 0.5151746273040771, + "step": 154 + }, + { + "epoch": 0.07660941554429754, + "grad_norm": 0.8234378506914858, + "learning_rate": 1.5172413793103448e-05, + "loss": 0.4743254780769348, + "step": 155 + }, + { + "epoch": 0.07710366983813172, + "grad_norm": 0.7901189046711773, + "learning_rate": 1.5270935960591133e-05, + "loss": 0.5167561769485474, + "step": 156 + }, + { + "epoch": 0.0775979241319659, + "grad_norm": 0.7442599788530032, + "learning_rate": 1.536945812807882e-05, + "loss": 0.47482365369796753, + "step": 157 + }, + { + "epoch": 0.07809217842580007, + "grad_norm": 0.7472930500337165, + "learning_rate": 1.5467980295566506e-05, + "loss": 0.5088409781455994, + "step": 158 + }, + { + "epoch": 0.07858643271963425, + "grad_norm": 0.839637174922739, + "learning_rate": 1.556650246305419e-05, + "loss": 0.5264201164245605, + "step": 159 + }, + { + "epoch": 0.07908068701346843, + "grad_norm": 0.8043048232381864, + "learning_rate": 1.5665024630541875e-05, + "loss": 0.5475984811782837, + "step": 160 + }, + { + "epoch": 0.07957494130730261, + "grad_norm": 0.813963733997232, + "learning_rate": 1.576354679802956e-05, + "loss": 0.5652282238006592, + "step": 161 + }, + { + "epoch": 0.08006919560113679, + "grad_norm": 0.8257458665080726, + "learning_rate": 1.586206896551724e-05, + "loss": 0.5179979801177979, + "step": 162 + }, + { + "epoch": 0.08056344989497097, + "grad_norm": 0.7453513460678786, + "learning_rate": 1.5960591133004928e-05, + "loss": 0.4966253638267517, + "step": 163 + }, + { + "epoch": 0.08105770418880515, + "grad_norm": 0.7400908854625781, + "learning_rate": 1.605911330049261e-05, + "loss": 0.5216315388679504, + "step": 164 + }, + { + "epoch": 0.08155195848263931, + "grad_norm": 0.7974617542166776, + "learning_rate": 1.6157635467980298e-05, + "loss": 0.495576411485672, + "step": 165 + }, + { + "epoch": 0.08204621277647349, + "grad_norm": 0.7828217496299378, + "learning_rate": 1.625615763546798e-05, + "loss": 0.5101697444915771, + "step": 166 + }, + { + "epoch": 0.08254046707030767, + "grad_norm": 0.7891722656265441, + "learning_rate": 1.6354679802955667e-05, + "loss": 0.5438036918640137, + "step": 167 + }, + { + "epoch": 0.08303472136414185, + "grad_norm": 0.8062908900423786, + "learning_rate": 1.645320197044335e-05, + "loss": 0.5043500661849976, + "step": 168 + }, + { + "epoch": 0.08352897565797603, + "grad_norm": 0.8893145421032131, + "learning_rate": 1.6551724137931037e-05, + "loss": 0.5129355788230896, + "step": 169 + }, + { + "epoch": 0.0840232299518102, + "grad_norm": 0.8344265538652059, + "learning_rate": 1.665024630541872e-05, + "loss": 0.48643916845321655, + "step": 170 + }, + { + "epoch": 0.08451748424564438, + "grad_norm": 0.9138503767586129, + "learning_rate": 1.6748768472906406e-05, + "loss": 0.5300272703170776, + "step": 171 + }, + { + "epoch": 0.08501173853947856, + "grad_norm": 0.9819214205489949, + "learning_rate": 1.684729064039409e-05, + "loss": 0.5321004390716553, + "step": 172 + }, + { + "epoch": 0.08550599283331274, + "grad_norm": 0.9555025734347583, + "learning_rate": 1.6945812807881776e-05, + "loss": 0.5066401958465576, + "step": 173 + }, + { + "epoch": 0.08600024712714692, + "grad_norm": 0.8139597552129452, + "learning_rate": 1.704433497536946e-05, + "loss": 0.48993563652038574, + "step": 174 + }, + { + "epoch": 0.0864945014209811, + "grad_norm": 0.8921248257221488, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.5468013882637024, + "step": 175 + }, + { + "epoch": 0.08698875571481528, + "grad_norm": 0.8277628260630481, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.5081865191459656, + "step": 176 + }, + { + "epoch": 0.08748301000864946, + "grad_norm": 0.7727605442624492, + "learning_rate": 1.7339901477832515e-05, + "loss": 0.48374873399734497, + "step": 177 + }, + { + "epoch": 0.08797726430248362, + "grad_norm": 0.7716185332367417, + "learning_rate": 1.7438423645320198e-05, + "loss": 0.4929465651512146, + "step": 178 + }, + { + "epoch": 0.0884715185963178, + "grad_norm": 0.7369259534742475, + "learning_rate": 1.7536945812807884e-05, + "loss": 0.49666428565979004, + "step": 179 + }, + { + "epoch": 0.08896577289015198, + "grad_norm": 0.9095846029993176, + "learning_rate": 1.7635467980295567e-05, + "loss": 0.5705476403236389, + "step": 180 + }, + { + "epoch": 0.08946002718398616, + "grad_norm": 0.8153458294604309, + "learning_rate": 1.7733990147783254e-05, + "loss": 0.5466605424880981, + "step": 181 + }, + { + "epoch": 0.08995428147782034, + "grad_norm": 0.7908211366510465, + "learning_rate": 1.7832512315270937e-05, + "loss": 0.47837337851524353, + "step": 182 + }, + { + "epoch": 0.09044853577165451, + "grad_norm": 0.8050205335034676, + "learning_rate": 1.7931034482758623e-05, + "loss": 0.5370041131973267, + "step": 183 + }, + { + "epoch": 0.0909427900654887, + "grad_norm": 0.8315453873696782, + "learning_rate": 1.8029556650246306e-05, + "loss": 0.540340006351471, + "step": 184 + }, + { + "epoch": 0.09143704435932287, + "grad_norm": 0.7864886396514408, + "learning_rate": 1.8128078817733993e-05, + "loss": 0.5165396928787231, + "step": 185 + }, + { + "epoch": 0.09193129865315705, + "grad_norm": 1.0212742677335798, + "learning_rate": 1.8226600985221676e-05, + "loss": 0.5391616821289062, + "step": 186 + }, + { + "epoch": 0.09242555294699123, + "grad_norm": 0.8362655612683817, + "learning_rate": 1.8325123152709362e-05, + "loss": 0.472774475812912, + "step": 187 + }, + { + "epoch": 0.09291980724082541, + "grad_norm": 0.7994913228950927, + "learning_rate": 1.8423645320197045e-05, + "loss": 0.5079161524772644, + "step": 188 + }, + { + "epoch": 0.09341406153465959, + "grad_norm": 0.7908069143027292, + "learning_rate": 1.852216748768473e-05, + "loss": 0.4909520149230957, + "step": 189 + }, + { + "epoch": 0.09390831582849377, + "grad_norm": 0.8204263481704893, + "learning_rate": 1.8620689655172415e-05, + "loss": 0.5214540362358093, + "step": 190 + }, + { + "epoch": 0.09440257012232794, + "grad_norm": 1.0097310819423937, + "learning_rate": 1.8719211822660098e-05, + "loss": 0.4820341467857361, + "step": 191 + }, + { + "epoch": 0.09489682441616211, + "grad_norm": 0.7986122947719724, + "learning_rate": 1.8817733990147784e-05, + "loss": 0.5094855427742004, + "step": 192 + }, + { + "epoch": 0.09539107870999629, + "grad_norm": 0.8104059351445748, + "learning_rate": 1.8916256157635468e-05, + "loss": 0.47840312123298645, + "step": 193 + }, + { + "epoch": 0.09588533300383047, + "grad_norm": 0.8556791067143968, + "learning_rate": 1.9014778325123154e-05, + "loss": 0.5368070602416992, + "step": 194 + }, + { + "epoch": 0.09637958729766465, + "grad_norm": 0.8413108625552047, + "learning_rate": 1.911330049261084e-05, + "loss": 0.493880033493042, + "step": 195 + }, + { + "epoch": 0.09687384159149882, + "grad_norm": 0.8344269563446816, + "learning_rate": 1.9211822660098524e-05, + "loss": 0.5052261352539062, + "step": 196 + }, + { + "epoch": 0.097368095885333, + "grad_norm": 0.8488100596559239, + "learning_rate": 1.931034482758621e-05, + "loss": 0.4817495346069336, + "step": 197 + }, + { + "epoch": 0.09786235017916718, + "grad_norm": 0.8835550004433761, + "learning_rate": 1.9408866995073893e-05, + "loss": 0.530259370803833, + "step": 198 + }, + { + "epoch": 0.09835660447300136, + "grad_norm": 0.8634602606490965, + "learning_rate": 1.950738916256158e-05, + "loss": 0.4984540045261383, + "step": 199 + }, + { + "epoch": 0.09885085876683554, + "grad_norm": 0.8655848178642821, + "learning_rate": 1.9605911330049263e-05, + "loss": 0.5472708940505981, + "step": 200 + }, + { + "epoch": 0.09934511306066972, + "grad_norm": 0.8520053240792014, + "learning_rate": 1.970443349753695e-05, + "loss": 0.5394926071166992, + "step": 201 + }, + { + "epoch": 0.0998393673545039, + "grad_norm": 0.9089636816290306, + "learning_rate": 1.9802955665024632e-05, + "loss": 0.5299160480499268, + "step": 202 + }, + { + "epoch": 0.10033362164833808, + "grad_norm": 0.9396000630272938, + "learning_rate": 1.990147783251232e-05, + "loss": 0.506400465965271, + "step": 203 + }, + { + "epoch": 0.10082787594217225, + "grad_norm": 0.7711226267847403, + "learning_rate": 2e-05, + "loss": 0.47956231236457825, + "step": 204 + }, + { + "epoch": 0.10132213023600642, + "grad_norm": 0.8971065288988803, + "learning_rate": 1.9999996662071442e-05, + "loss": 0.48805660009384155, + "step": 205 + }, + { + "epoch": 0.1018163845298406, + "grad_norm": 0.8419748393313904, + "learning_rate": 1.9999986648287996e-05, + "loss": 0.46014025807380676, + "step": 206 + }, + { + "epoch": 0.10231063882367478, + "grad_norm": 0.8667704651728929, + "learning_rate": 1.9999969958656345e-05, + "loss": 0.4654610753059387, + "step": 207 + }, + { + "epoch": 0.10280489311750896, + "grad_norm": 0.8143872307343123, + "learning_rate": 1.999994659318763e-05, + "loss": 0.47037336230278015, + "step": 208 + }, + { + "epoch": 0.10329914741134313, + "grad_norm": 0.8238466130965688, + "learning_rate": 1.999991655189745e-05, + "loss": 0.4853154718875885, + "step": 209 + }, + { + "epoch": 0.10379340170517731, + "grad_norm": 0.8233043672230826, + "learning_rate": 1.9999879834805865e-05, + "loss": 0.4918109178543091, + "step": 210 + }, + { + "epoch": 0.10428765599901149, + "grad_norm": 0.787297319281164, + "learning_rate": 1.999983644193738e-05, + "loss": 0.5136955380439758, + "step": 211 + }, + { + "epoch": 0.10478191029284567, + "grad_norm": 0.8895124065919626, + "learning_rate": 1.9999786373320972e-05, + "loss": 0.5145115852355957, + "step": 212 + }, + { + "epoch": 0.10527616458667985, + "grad_norm": 0.8153315460424436, + "learning_rate": 1.9999729628990058e-05, + "loss": 0.4624764025211334, + "step": 213 + }, + { + "epoch": 0.10577041888051403, + "grad_norm": 0.7949012412003572, + "learning_rate": 1.9999666208982518e-05, + "loss": 0.4599718749523163, + "step": 214 + }, + { + "epoch": 0.1062646731743482, + "grad_norm": 0.8110981138692489, + "learning_rate": 1.99995961133407e-05, + "loss": 0.4642864465713501, + "step": 215 + }, + { + "epoch": 0.10675892746818239, + "grad_norm": 0.7785663242974379, + "learning_rate": 1.9999519342111392e-05, + "loss": 0.4756677448749542, + "step": 216 + }, + { + "epoch": 0.10725318176201656, + "grad_norm": 0.8781173550322721, + "learning_rate": 1.9999435895345846e-05, + "loss": 0.4982803463935852, + "step": 217 + }, + { + "epoch": 0.10774743605585073, + "grad_norm": 0.905677346569408, + "learning_rate": 1.999934577309977e-05, + "loss": 0.5189295411109924, + "step": 218 + }, + { + "epoch": 0.10824169034968491, + "grad_norm": 0.8870093356565885, + "learning_rate": 1.999924897543333e-05, + "loss": 0.5077873468399048, + "step": 219 + }, + { + "epoch": 0.10873594464351909, + "grad_norm": 0.9164316488089079, + "learning_rate": 1.9999145502411148e-05, + "loss": 0.5510451793670654, + "step": 220 + }, + { + "epoch": 0.10923019893735327, + "grad_norm": 0.850640343977404, + "learning_rate": 1.9999035354102298e-05, + "loss": 0.44604551792144775, + "step": 221 + }, + { + "epoch": 0.10972445323118744, + "grad_norm": 0.7739778425864705, + "learning_rate": 1.9998918530580315e-05, + "loss": 0.42567160725593567, + "step": 222 + }, + { + "epoch": 0.11021870752502162, + "grad_norm": 0.8699648367810445, + "learning_rate": 1.9998795031923186e-05, + "loss": 0.4622190594673157, + "step": 223 + }, + { + "epoch": 0.1107129618188558, + "grad_norm": 0.8261943707290175, + "learning_rate": 1.999866485821336e-05, + "loss": 0.5023611783981323, + "step": 224 + }, + { + "epoch": 0.11120721611268998, + "grad_norm": 0.769204860463621, + "learning_rate": 1.9998528009537735e-05, + "loss": 0.451701819896698, + "step": 225 + }, + { + "epoch": 0.11170147040652416, + "grad_norm": 0.9053438794448195, + "learning_rate": 1.9998384485987675e-05, + "loss": 0.48493725061416626, + "step": 226 + }, + { + "epoch": 0.11219572470035834, + "grad_norm": 0.7780216873284675, + "learning_rate": 1.9998234287658996e-05, + "loss": 0.45377853512763977, + "step": 227 + }, + { + "epoch": 0.11268997899419252, + "grad_norm": 0.9129521331875277, + "learning_rate": 1.9998077414651957e-05, + "loss": 0.48963701725006104, + "step": 228 + }, + { + "epoch": 0.1131842332880267, + "grad_norm": 0.8500208947168179, + "learning_rate": 1.9997913867071296e-05, + "loss": 0.47935402393341064, + "step": 229 + }, + { + "epoch": 0.11367848758186087, + "grad_norm": 0.8984825507205957, + "learning_rate": 1.999774364502619e-05, + "loss": 0.46203523874282837, + "step": 230 + }, + { + "epoch": 0.11417274187569504, + "grad_norm": 0.8695917880315948, + "learning_rate": 1.9997566748630274e-05, + "loss": 0.4411412179470062, + "step": 231 + }, + { + "epoch": 0.11466699616952922, + "grad_norm": 0.9063292151670944, + "learning_rate": 1.9997383178001646e-05, + "loss": 0.44424787163734436, + "step": 232 + }, + { + "epoch": 0.1151612504633634, + "grad_norm": 0.9239108187837685, + "learning_rate": 1.9997192933262853e-05, + "loss": 0.4862042963504791, + "step": 233 + }, + { + "epoch": 0.11565550475719757, + "grad_norm": 0.9583721120887143, + "learning_rate": 1.99969960145409e-05, + "loss": 0.49599340558052063, + "step": 234 + }, + { + "epoch": 0.11614975905103175, + "grad_norm": 0.8373453660412895, + "learning_rate": 1.999679242196725e-05, + "loss": 0.49702027440071106, + "step": 235 + }, + { + "epoch": 0.11664401334486593, + "grad_norm": 0.9122480348696357, + "learning_rate": 1.9996582155677813e-05, + "loss": 0.520037829875946, + "step": 236 + }, + { + "epoch": 0.11713826763870011, + "grad_norm": 0.8765545420336399, + "learning_rate": 1.999636521581296e-05, + "loss": 0.4571160674095154, + "step": 237 + }, + { + "epoch": 0.11763252193253429, + "grad_norm": 0.8739431997449725, + "learning_rate": 1.9996141602517526e-05, + "loss": 0.45602840185165405, + "step": 238 + }, + { + "epoch": 0.11812677622636847, + "grad_norm": 0.8737753030098584, + "learning_rate": 1.999591131594078e-05, + "loss": 0.4909728169441223, + "step": 239 + }, + { + "epoch": 0.11862103052020265, + "grad_norm": 0.9637438681008479, + "learning_rate": 1.9995674356236468e-05, + "loss": 0.47716090083122253, + "step": 240 + }, + { + "epoch": 0.11911528481403683, + "grad_norm": 0.8781513787464966, + "learning_rate": 1.9995430723562774e-05, + "loss": 0.4449527859687805, + "step": 241 + }, + { + "epoch": 0.119609539107871, + "grad_norm": 0.9278951723441426, + "learning_rate": 1.9995180418082347e-05, + "loss": 0.49069035053253174, + "step": 242 + }, + { + "epoch": 0.12010379340170518, + "grad_norm": 0.8082383806465664, + "learning_rate": 1.9994923439962286e-05, + "loss": 0.506738543510437, + "step": 243 + }, + { + "epoch": 0.12059804769553935, + "grad_norm": 0.7256243644120642, + "learning_rate": 1.9994659789374145e-05, + "loss": 0.38516658544540405, + "step": 244 + }, + { + "epoch": 0.12109230198937353, + "grad_norm": 1.120005864402108, + "learning_rate": 1.9994389466493942e-05, + "loss": 0.49539780616760254, + "step": 245 + }, + { + "epoch": 0.1215865562832077, + "grad_norm": 0.8099291045850996, + "learning_rate": 1.999411247150213e-05, + "loss": 0.4400706887245178, + "step": 246 + }, + { + "epoch": 0.12208081057704188, + "grad_norm": 1.033732324753182, + "learning_rate": 1.9993828804583625e-05, + "loss": 0.48815736174583435, + "step": 247 + }, + { + "epoch": 0.12257506487087606, + "grad_norm": 0.8506340248073136, + "learning_rate": 1.999353846592781e-05, + "loss": 0.42744773626327515, + "step": 248 + }, + { + "epoch": 0.12306931916471024, + "grad_norm": 0.8847437809130215, + "learning_rate": 1.9993241455728505e-05, + "loss": 0.4370969235897064, + "step": 249 + }, + { + "epoch": 0.12356357345854442, + "grad_norm": 0.8643380888364789, + "learning_rate": 1.9992937774183988e-05, + "loss": 0.4803960621356964, + "step": 250 + }, + { + "epoch": 0.1240578277523786, + "grad_norm": 0.8986867692232635, + "learning_rate": 1.9992627421496994e-05, + "loss": 0.4614640474319458, + "step": 251 + }, + { + "epoch": 0.12455208204621278, + "grad_norm": 0.819634526245566, + "learning_rate": 1.9992310397874715e-05, + "loss": 0.46626490354537964, + "step": 252 + }, + { + "epoch": 0.12504633634004694, + "grad_norm": 0.8614062439986471, + "learning_rate": 1.9991986703528784e-05, + "loss": 0.4812886416912079, + "step": 253 + }, + { + "epoch": 0.12554059063388112, + "grad_norm": 0.782352455662906, + "learning_rate": 1.99916563386753e-05, + "loss": 0.45037686824798584, + "step": 254 + }, + { + "epoch": 0.1260348449277153, + "grad_norm": 0.8735972282090627, + "learning_rate": 1.9991319303534804e-05, + "loss": 0.48492124676704407, + "step": 255 + }, + { + "epoch": 0.12652909922154948, + "grad_norm": 0.9123971905878313, + "learning_rate": 1.9990975598332304e-05, + "loss": 0.48825496435165405, + "step": 256 + }, + { + "epoch": 0.12702335351538366, + "grad_norm": 0.9350748088966393, + "learning_rate": 1.9990625223297244e-05, + "loss": 0.4836634695529938, + "step": 257 + }, + { + "epoch": 0.12751760780921784, + "grad_norm": 0.8091067369882244, + "learning_rate": 1.9990268178663538e-05, + "loss": 0.4632943272590637, + "step": 258 + }, + { + "epoch": 0.12801186210305202, + "grad_norm": 0.8933963237824735, + "learning_rate": 1.9989904464669533e-05, + "loss": 0.4601137042045593, + "step": 259 + }, + { + "epoch": 0.1285061163968862, + "grad_norm": 0.956219889400008, + "learning_rate": 1.998953408155805e-05, + "loss": 0.4390139579772949, + "step": 260 + }, + { + "epoch": 0.12900037069072037, + "grad_norm": 0.8209256250218969, + "learning_rate": 1.9989157029576348e-05, + "loss": 0.45749080181121826, + "step": 261 + }, + { + "epoch": 0.12949462498455455, + "grad_norm": 0.8687280720196128, + "learning_rate": 1.998877330897614e-05, + "loss": 0.4490616023540497, + "step": 262 + }, + { + "epoch": 0.12998887927838873, + "grad_norm": 0.8048623785766325, + "learning_rate": 1.998838292001359e-05, + "loss": 0.4819987714290619, + "step": 263 + }, + { + "epoch": 0.1304831335722229, + "grad_norm": 0.8512266303867803, + "learning_rate": 1.9987985862949325e-05, + "loss": 0.4448384940624237, + "step": 264 + }, + { + "epoch": 0.1309773878660571, + "grad_norm": 0.8699526878628875, + "learning_rate": 1.9987582138048405e-05, + "loss": 0.4574149549007416, + "step": 265 + }, + { + "epoch": 0.13147164215989127, + "grad_norm": 0.8239086741829158, + "learning_rate": 1.9987171745580353e-05, + "loss": 0.4765186607837677, + "step": 266 + }, + { + "epoch": 0.13196589645372545, + "grad_norm": 0.8859727328667625, + "learning_rate": 1.998675468581915e-05, + "loss": 0.4900081753730774, + "step": 267 + }, + { + "epoch": 0.13246015074755962, + "grad_norm": 0.8200731674424109, + "learning_rate": 1.9986330959043206e-05, + "loss": 0.433933287858963, + "step": 268 + }, + { + "epoch": 0.1329544050413938, + "grad_norm": 0.8424887851968712, + "learning_rate": 1.9985900565535403e-05, + "loss": 0.452491819858551, + "step": 269 + }, + { + "epoch": 0.13344865933522798, + "grad_norm": 0.8454499255279871, + "learning_rate": 1.9985463505583062e-05, + "loss": 0.4583294987678528, + "step": 270 + }, + { + "epoch": 0.13394291362906216, + "grad_norm": 0.7993545503780815, + "learning_rate": 1.9985019779477958e-05, + "loss": 0.43183961510658264, + "step": 271 + }, + { + "epoch": 0.13443716792289634, + "grad_norm": 0.8548370246393396, + "learning_rate": 1.998456938751632e-05, + "loss": 0.48075324296951294, + "step": 272 + }, + { + "epoch": 0.13493142221673052, + "grad_norm": 0.9002412472414919, + "learning_rate": 1.9984112329998825e-05, + "loss": 0.5131007432937622, + "step": 273 + }, + { + "epoch": 0.1354256765105647, + "grad_norm": 0.9730858409317547, + "learning_rate": 1.998364860723059e-05, + "loss": 0.4841446876525879, + "step": 274 + }, + { + "epoch": 0.13591993080439888, + "grad_norm": 0.845168898875427, + "learning_rate": 1.9983178219521194e-05, + "loss": 0.5001078248023987, + "step": 275 + }, + { + "epoch": 0.13641418509823305, + "grad_norm": 0.9216453803321015, + "learning_rate": 1.998270116718466e-05, + "loss": 0.44851893186569214, + "step": 276 + }, + { + "epoch": 0.1369084393920672, + "grad_norm": 0.8496437780068066, + "learning_rate": 1.9982217450539464e-05, + "loss": 0.4635714888572693, + "step": 277 + }, + { + "epoch": 0.13740269368590138, + "grad_norm": 0.8697167139912243, + "learning_rate": 1.9981727069908525e-05, + "loss": 0.4171838164329529, + "step": 278 + }, + { + "epoch": 0.13789694797973556, + "grad_norm": 0.9173222191020198, + "learning_rate": 1.9981230025619216e-05, + "loss": 0.4819942116737366, + "step": 279 + }, + { + "epoch": 0.13839120227356974, + "grad_norm": 0.965585018194969, + "learning_rate": 1.998072631800336e-05, + "loss": 0.47878971695899963, + "step": 280 + }, + { + "epoch": 0.13888545656740392, + "grad_norm": 0.8354999533998939, + "learning_rate": 1.9980215947397217e-05, + "loss": 0.4436519145965576, + "step": 281 + }, + { + "epoch": 0.1393797108612381, + "grad_norm": 0.9615471937507843, + "learning_rate": 1.9979698914141507e-05, + "loss": 0.4633050262928009, + "step": 282 + }, + { + "epoch": 0.13987396515507228, + "grad_norm": 0.8419828093645744, + "learning_rate": 1.9979175218581397e-05, + "loss": 0.4264826774597168, + "step": 283 + }, + { + "epoch": 0.14036821944890646, + "grad_norm": 0.9397240311894202, + "learning_rate": 1.9978644861066493e-05, + "loss": 0.47763916850090027, + "step": 284 + }, + { + "epoch": 0.14086247374274063, + "grad_norm": 0.9621046785661004, + "learning_rate": 1.997810784195086e-05, + "loss": 0.44895434379577637, + "step": 285 + }, + { + "epoch": 0.1413567280365748, + "grad_norm": 0.9045420673708359, + "learning_rate": 1.9977564161593e-05, + "loss": 0.4287600517272949, + "step": 286 + }, + { + "epoch": 0.141850982330409, + "grad_norm": 0.9070406248365095, + "learning_rate": 1.997701382035587e-05, + "loss": 0.44175297021865845, + "step": 287 + }, + { + "epoch": 0.14234523662424317, + "grad_norm": 0.9409958894859969, + "learning_rate": 1.9976456818606868e-05, + "loss": 0.4393232464790344, + "step": 288 + }, + { + "epoch": 0.14283949091807735, + "grad_norm": 0.9574764348211552, + "learning_rate": 1.9975893156717836e-05, + "loss": 0.4600023329257965, + "step": 289 + }, + { + "epoch": 0.14333374521191153, + "grad_norm": 0.9582932704552442, + "learning_rate": 1.9975322835065075e-05, + "loss": 0.4819300174713135, + "step": 290 + }, + { + "epoch": 0.1438279995057457, + "grad_norm": 0.8798665685233671, + "learning_rate": 1.9974745854029318e-05, + "loss": 0.4391498267650604, + "step": 291 + }, + { + "epoch": 0.14432225379957989, + "grad_norm": 0.8278978827145046, + "learning_rate": 1.9974162213995748e-05, + "loss": 0.43435904383659363, + "step": 292 + }, + { + "epoch": 0.14481650809341406, + "grad_norm": 0.8555919001416697, + "learning_rate": 1.9973571915354e-05, + "loss": 0.43575727939605713, + "step": 293 + }, + { + "epoch": 0.14531076238724824, + "grad_norm": 0.847472972308698, + "learning_rate": 1.9972974958498145e-05, + "loss": 0.39998459815979004, + "step": 294 + }, + { + "epoch": 0.14580501668108242, + "grad_norm": 0.9068432330089449, + "learning_rate": 1.9972371343826705e-05, + "loss": 0.4620361030101776, + "step": 295 + }, + { + "epoch": 0.1462992709749166, + "grad_norm": 0.9496965104492539, + "learning_rate": 1.9971761071742644e-05, + "loss": 0.5172264575958252, + "step": 296 + }, + { + "epoch": 0.14679352526875078, + "grad_norm": 0.9234160870013586, + "learning_rate": 1.997114414265337e-05, + "loss": 0.4685489535331726, + "step": 297 + }, + { + "epoch": 0.14728777956258496, + "grad_norm": 0.8830728533856737, + "learning_rate": 1.9970520556970735e-05, + "loss": 0.4346499741077423, + "step": 298 + }, + { + "epoch": 0.14778203385641914, + "grad_norm": 0.8462127222831192, + "learning_rate": 1.996989031511104e-05, + "loss": 0.4051141142845154, + "step": 299 + }, + { + "epoch": 0.14827628815025332, + "grad_norm": 1.5751283315817302, + "learning_rate": 1.996925341749502e-05, + "loss": 0.4862591028213501, + "step": 300 + }, + { + "epoch": 0.1487705424440875, + "grad_norm": 0.9475006076143342, + "learning_rate": 1.996860986454787e-05, + "loss": 0.44075754284858704, + "step": 301 + }, + { + "epoch": 0.14926479673792167, + "grad_norm": 0.8707373783945862, + "learning_rate": 1.99679596566992e-05, + "loss": 0.44321805238723755, + "step": 302 + }, + { + "epoch": 0.14975905103175585, + "grad_norm": 0.8195768056986794, + "learning_rate": 1.996730279438309e-05, + "loss": 0.4468157887458801, + "step": 303 + }, + { + "epoch": 0.15025330532559, + "grad_norm": 0.9918503423974457, + "learning_rate": 1.996663927803805e-05, + "loss": 0.48698270320892334, + "step": 304 + }, + { + "epoch": 0.15074755961942418, + "grad_norm": 0.9116215117394889, + "learning_rate": 1.9965969108107032e-05, + "loss": 0.41898253560066223, + "step": 305 + }, + { + "epoch": 0.15124181391325836, + "grad_norm": 0.9221438157249551, + "learning_rate": 1.9965292285037437e-05, + "loss": 0.4827130436897278, + "step": 306 + }, + { + "epoch": 0.15173606820709254, + "grad_norm": 0.8314057300557679, + "learning_rate": 1.99646088092811e-05, + "loss": 0.4219037592411041, + "step": 307 + }, + { + "epoch": 0.15223032250092672, + "grad_norm": 0.8392045773293594, + "learning_rate": 1.9963918681294298e-05, + "loss": 0.4431123733520508, + "step": 308 + }, + { + "epoch": 0.1527245767947609, + "grad_norm": 0.8500815118931239, + "learning_rate": 1.996322190153775e-05, + "loss": 0.4161941409111023, + "step": 309 + }, + { + "epoch": 0.15321883108859508, + "grad_norm": 0.9107651666369411, + "learning_rate": 1.9962518470476617e-05, + "loss": 0.4774768650531769, + "step": 310 + }, + { + "epoch": 0.15371308538242925, + "grad_norm": 0.8037347887475985, + "learning_rate": 1.9961808388580503e-05, + "loss": 0.4196036159992218, + "step": 311 + }, + { + "epoch": 0.15420733967626343, + "grad_norm": 1.0067362464519019, + "learning_rate": 1.996109165632344e-05, + "loss": 0.44241398572921753, + "step": 312 + }, + { + "epoch": 0.1547015939700976, + "grad_norm": 0.888150506782497, + "learning_rate": 1.996036827418392e-05, + "loss": 0.47662627696990967, + "step": 313 + }, + { + "epoch": 0.1551958482639318, + "grad_norm": 0.8458159023673953, + "learning_rate": 1.9959638242644855e-05, + "loss": 0.4241487979888916, + "step": 314 + }, + { + "epoch": 0.15569010255776597, + "grad_norm": 0.9355978957071136, + "learning_rate": 1.9958901562193605e-05, + "loss": 0.45686113834381104, + "step": 315 + }, + { + "epoch": 0.15618435685160015, + "grad_norm": 0.944155507976385, + "learning_rate": 1.9958158233321968e-05, + "loss": 0.4154825806617737, + "step": 316 + }, + { + "epoch": 0.15667861114543433, + "grad_norm": 0.9827195710672626, + "learning_rate": 1.9957408256526176e-05, + "loss": 0.4705435037612915, + "step": 317 + }, + { + "epoch": 0.1571728654392685, + "grad_norm": 0.9880074034620054, + "learning_rate": 1.9956651632306908e-05, + "loss": 0.4367898404598236, + "step": 318 + }, + { + "epoch": 0.15766711973310268, + "grad_norm": 0.9294773909083144, + "learning_rate": 1.9955888361169272e-05, + "loss": 0.4668901264667511, + "step": 319 + }, + { + "epoch": 0.15816137402693686, + "grad_norm": 0.9543525396859661, + "learning_rate": 1.995511844362282e-05, + "loss": 0.46429356932640076, + "step": 320 + }, + { + "epoch": 0.15865562832077104, + "grad_norm": 0.9206239653453478, + "learning_rate": 1.9954341880181536e-05, + "loss": 0.4582952857017517, + "step": 321 + }, + { + "epoch": 0.15914988261460522, + "grad_norm": 0.9460762127599929, + "learning_rate": 1.9953558671363843e-05, + "loss": 0.45110762119293213, + "step": 322 + }, + { + "epoch": 0.1596441369084394, + "grad_norm": 0.9441078381056233, + "learning_rate": 1.99527688176926e-05, + "loss": 0.4049065113067627, + "step": 323 + }, + { + "epoch": 0.16013839120227358, + "grad_norm": 0.8033040053333058, + "learning_rate": 1.9951972319695105e-05, + "loss": 0.40884825587272644, + "step": 324 + }, + { + "epoch": 0.16063264549610776, + "grad_norm": 0.902465277703788, + "learning_rate": 1.9951169177903084e-05, + "loss": 0.4416786730289459, + "step": 325 + }, + { + "epoch": 0.16112689978994194, + "grad_norm": 0.8396124025463547, + "learning_rate": 1.9950359392852704e-05, + "loss": 0.4318765103816986, + "step": 326 + }, + { + "epoch": 0.16162115408377611, + "grad_norm": 0.9197188335811614, + "learning_rate": 1.9949542965084564e-05, + "loss": 0.4415965974330902, + "step": 327 + }, + { + "epoch": 0.1621154083776103, + "grad_norm": 0.9816748337776936, + "learning_rate": 1.9948719895143703e-05, + "loss": 0.4816298186779022, + "step": 328 + }, + { + "epoch": 0.16260966267144447, + "grad_norm": 0.8960734361029558, + "learning_rate": 1.9947890183579594e-05, + "loss": 0.4329088032245636, + "step": 329 + }, + { + "epoch": 0.16310391696527862, + "grad_norm": 0.9960918612087606, + "learning_rate": 1.9947053830946134e-05, + "loss": 0.43193015456199646, + "step": 330 + }, + { + "epoch": 0.1635981712591128, + "grad_norm": 0.9310501291263382, + "learning_rate": 1.994621083780166e-05, + "loss": 0.48738086223602295, + "step": 331 + }, + { + "epoch": 0.16409242555294698, + "grad_norm": 0.9523291617618251, + "learning_rate": 1.9945361204708948e-05, + "loss": 0.4707815647125244, + "step": 332 + }, + { + "epoch": 0.16458667984678116, + "grad_norm": 0.8438149141988297, + "learning_rate": 1.9944504932235198e-05, + "loss": 0.4190637469291687, + "step": 333 + }, + { + "epoch": 0.16508093414061534, + "grad_norm": 0.9348901251563362, + "learning_rate": 1.9943642020952042e-05, + "loss": 0.45955735445022583, + "step": 334 + }, + { + "epoch": 0.16557518843444952, + "grad_norm": 0.9334033255095994, + "learning_rate": 1.9942772471435555e-05, + "loss": 0.4675702750682831, + "step": 335 + }, + { + "epoch": 0.1660694427282837, + "grad_norm": 0.9694338385909206, + "learning_rate": 1.9941896284266224e-05, + "loss": 0.42571327090263367, + "step": 336 + }, + { + "epoch": 0.16656369702211787, + "grad_norm": 0.827954024094364, + "learning_rate": 1.994101346002899e-05, + "loss": 0.4341443181037903, + "step": 337 + }, + { + "epoch": 0.16705795131595205, + "grad_norm": 0.9227161087353433, + "learning_rate": 1.9940123999313214e-05, + "loss": 0.4473035931587219, + "step": 338 + }, + { + "epoch": 0.16755220560978623, + "grad_norm": 0.9514215023205275, + "learning_rate": 1.9939227902712676e-05, + "loss": 0.4692152142524719, + "step": 339 + }, + { + "epoch": 0.1680464599036204, + "grad_norm": 0.902462533797338, + "learning_rate": 1.9938325170825607e-05, + "loss": 0.4169067442417145, + "step": 340 + }, + { + "epoch": 0.1685407141974546, + "grad_norm": 0.8958693793994358, + "learning_rate": 1.9937415804254657e-05, + "loss": 0.451092928647995, + "step": 341 + }, + { + "epoch": 0.16903496849128877, + "grad_norm": 0.9439820250269497, + "learning_rate": 1.99364998036069e-05, + "loss": 0.39640212059020996, + "step": 342 + }, + { + "epoch": 0.16952922278512295, + "grad_norm": 0.9953253959869931, + "learning_rate": 1.9935577169493854e-05, + "loss": 0.46396374702453613, + "step": 343 + }, + { + "epoch": 0.17002347707895712, + "grad_norm": 0.940542166338043, + "learning_rate": 1.9934647902531453e-05, + "loss": 0.4343748390674591, + "step": 344 + }, + { + "epoch": 0.1705177313727913, + "grad_norm": 0.8926095624124082, + "learning_rate": 1.9933712003340056e-05, + "loss": 0.4353589713573456, + "step": 345 + }, + { + "epoch": 0.17101198566662548, + "grad_norm": 0.981244679678695, + "learning_rate": 1.9932769472544464e-05, + "loss": 0.4423677623271942, + "step": 346 + }, + { + "epoch": 0.17150623996045966, + "grad_norm": 0.9632090771111401, + "learning_rate": 1.9931820310773894e-05, + "loss": 0.4382045865058899, + "step": 347 + }, + { + "epoch": 0.17200049425429384, + "grad_norm": 0.9042153187184925, + "learning_rate": 1.993086451866199e-05, + "loss": 0.3966183066368103, + "step": 348 + }, + { + "epoch": 0.17249474854812802, + "grad_norm": 0.9998736444681166, + "learning_rate": 1.9929902096846833e-05, + "loss": 0.48624011874198914, + "step": 349 + }, + { + "epoch": 0.1729890028419622, + "grad_norm": 0.9399569652966117, + "learning_rate": 1.9928933045970913e-05, + "loss": 0.4442569315433502, + "step": 350 + }, + { + "epoch": 0.17348325713579638, + "grad_norm": 0.9204808269523502, + "learning_rate": 1.992795736668116e-05, + "loss": 0.42499929666519165, + "step": 351 + }, + { + "epoch": 0.17397751142963055, + "grad_norm": 0.9507435140290256, + "learning_rate": 1.9926975059628923e-05, + "loss": 0.4230741858482361, + "step": 352 + }, + { + "epoch": 0.17447176572346473, + "grad_norm": 0.9092303670359448, + "learning_rate": 1.9925986125469974e-05, + "loss": 0.4273882806301117, + "step": 353 + }, + { + "epoch": 0.1749660200172989, + "grad_norm": 0.9603670891238569, + "learning_rate": 1.9924990564864513e-05, + "loss": 0.45237618684768677, + "step": 354 + }, + { + "epoch": 0.1754602743111331, + "grad_norm": 0.8737901526941092, + "learning_rate": 1.9923988378477165e-05, + "loss": 0.4115524888038635, + "step": 355 + }, + { + "epoch": 0.17595452860496724, + "grad_norm": 0.8886450314145863, + "learning_rate": 1.9922979566976968e-05, + "loss": 0.4476633071899414, + "step": 356 + }, + { + "epoch": 0.17644878289880142, + "grad_norm": 1.155944411883778, + "learning_rate": 1.9921964131037398e-05, + "loss": 0.44930100440979004, + "step": 357 + }, + { + "epoch": 0.1769430371926356, + "grad_norm": 1.0356351975379994, + "learning_rate": 1.9920942071336338e-05, + "loss": 0.4714374244213104, + "step": 358 + }, + { + "epoch": 0.17743729148646978, + "grad_norm": 0.9469405731486913, + "learning_rate": 1.9919913388556105e-05, + "loss": 0.47696003317832947, + "step": 359 + }, + { + "epoch": 0.17793154578030396, + "grad_norm": 0.9021123492009391, + "learning_rate": 1.9918878083383434e-05, + "loss": 0.44937074184417725, + "step": 360 + }, + { + "epoch": 0.17842580007413814, + "grad_norm": 0.9771832594876818, + "learning_rate": 1.9917836156509472e-05, + "loss": 0.44937658309936523, + "step": 361 + }, + { + "epoch": 0.17892005436797231, + "grad_norm": 0.8240548100976023, + "learning_rate": 1.9916787608629805e-05, + "loss": 0.42068418860435486, + "step": 362 + }, + { + "epoch": 0.1794143086618065, + "grad_norm": 0.9112160927316303, + "learning_rate": 1.9915732440444428e-05, + "loss": 0.3791036605834961, + "step": 363 + }, + { + "epoch": 0.17990856295564067, + "grad_norm": 0.8982890263422821, + "learning_rate": 1.991467065265775e-05, + "loss": 0.401694118976593, + "step": 364 + }, + { + "epoch": 0.18040281724947485, + "grad_norm": 0.9743587318559909, + "learning_rate": 1.9913602245978602e-05, + "loss": 0.44095057249069214, + "step": 365 + }, + { + "epoch": 0.18089707154330903, + "grad_norm": 1.0125028049881057, + "learning_rate": 1.9912527221120248e-05, + "loss": 0.435880184173584, + "step": 366 + }, + { + "epoch": 0.1813913258371432, + "grad_norm": 0.9329716691545672, + "learning_rate": 1.991144557880035e-05, + "loss": 0.4147350490093231, + "step": 367 + }, + { + "epoch": 0.1818855801309774, + "grad_norm": 1.0077861725089856, + "learning_rate": 1.9910357319741006e-05, + "loss": 0.4191502630710602, + "step": 368 + }, + { + "epoch": 0.18237983442481157, + "grad_norm": 0.9334667001994715, + "learning_rate": 1.9909262444668715e-05, + "loss": 0.41988956928253174, + "step": 369 + }, + { + "epoch": 0.18287408871864574, + "grad_norm": 1.0279430559635638, + "learning_rate": 1.99081609543144e-05, + "loss": 0.47451251745224, + "step": 370 + }, + { + "epoch": 0.18336834301247992, + "grad_norm": 0.9591522165165333, + "learning_rate": 1.9907052849413408e-05, + "loss": 0.44665899872779846, + "step": 371 + }, + { + "epoch": 0.1838625973063141, + "grad_norm": 1.0147189696208934, + "learning_rate": 1.990593813070548e-05, + "loss": 0.40575331449508667, + "step": 372 + }, + { + "epoch": 0.18435685160014828, + "grad_norm": 0.869456919545876, + "learning_rate": 1.99048167989348e-05, + "loss": 0.40580621361732483, + "step": 373 + }, + { + "epoch": 0.18485110589398246, + "grad_norm": 0.9514367145479501, + "learning_rate": 1.9903688854849948e-05, + "loss": 0.461843878030777, + "step": 374 + }, + { + "epoch": 0.18534536018781664, + "grad_norm": 0.9237949473924573, + "learning_rate": 1.990255429920392e-05, + "loss": 0.38992881774902344, + "step": 375 + }, + { + "epoch": 0.18583961448165082, + "grad_norm": 0.8831901142276523, + "learning_rate": 1.9901413132754133e-05, + "loss": 0.4288073480129242, + "step": 376 + }, + { + "epoch": 0.186333868775485, + "grad_norm": 0.9233387492673684, + "learning_rate": 1.9900265356262418e-05, + "loss": 0.4376278221607208, + "step": 377 + }, + { + "epoch": 0.18682812306931917, + "grad_norm": 1.0362403856880367, + "learning_rate": 1.9899110970495e-05, + "loss": 0.4127569794654846, + "step": 378 + }, + { + "epoch": 0.18732237736315335, + "grad_norm": 0.9507974239376735, + "learning_rate": 1.9897949976222543e-05, + "loss": 0.4221431016921997, + "step": 379 + }, + { + "epoch": 0.18781663165698753, + "grad_norm": 0.9433678538632697, + "learning_rate": 1.9896782374220108e-05, + "loss": 0.3540682792663574, + "step": 380 + }, + { + "epoch": 0.1883108859508217, + "grad_norm": 0.9261378158924178, + "learning_rate": 1.9895608165267165e-05, + "loss": 0.3746468424797058, + "step": 381 + }, + { + "epoch": 0.1888051402446559, + "grad_norm": 0.885989840984364, + "learning_rate": 1.9894427350147602e-05, + "loss": 0.44986462593078613, + "step": 382 + }, + { + "epoch": 0.18929939453849004, + "grad_norm": 0.990953109983041, + "learning_rate": 1.9893239929649716e-05, + "loss": 0.38902726769447327, + "step": 383 + }, + { + "epoch": 0.18979364883232422, + "grad_norm": 0.9780134618767543, + "learning_rate": 1.9892045904566212e-05, + "loss": 0.43202030658721924, + "step": 384 + }, + { + "epoch": 0.1902879031261584, + "grad_norm": 0.9892650612917288, + "learning_rate": 1.9890845275694197e-05, + "loss": 0.3984760344028473, + "step": 385 + }, + { + "epoch": 0.19078215741999258, + "grad_norm": 0.9818585745680383, + "learning_rate": 1.9889638043835203e-05, + "loss": 0.41927874088287354, + "step": 386 + }, + { + "epoch": 0.19127641171382676, + "grad_norm": 0.8767703705433573, + "learning_rate": 1.9888424209795153e-05, + "loss": 0.3809741735458374, + "step": 387 + }, + { + "epoch": 0.19177066600766093, + "grad_norm": 0.9482820311569345, + "learning_rate": 1.988720377438439e-05, + "loss": 0.4237920045852661, + "step": 388 + }, + { + "epoch": 0.1922649203014951, + "grad_norm": 1.0327070863618417, + "learning_rate": 1.9885976738417662e-05, + "loss": 0.4065277576446533, + "step": 389 + }, + { + "epoch": 0.1927591745953293, + "grad_norm": 0.9237977569787911, + "learning_rate": 1.9884743102714116e-05, + "loss": 0.41154375672340393, + "step": 390 + }, + { + "epoch": 0.19325342888916347, + "grad_norm": 1.2326124039761357, + "learning_rate": 1.9883502868097304e-05, + "loss": 0.46544453501701355, + "step": 391 + }, + { + "epoch": 0.19374768318299765, + "grad_norm": 0.9587510645484782, + "learning_rate": 1.9882256035395204e-05, + "loss": 0.41279950737953186, + "step": 392 + }, + { + "epoch": 0.19424193747683183, + "grad_norm": 0.861022204519604, + "learning_rate": 1.988100260544017e-05, + "loss": 0.40083667635917664, + "step": 393 + }, + { + "epoch": 0.194736191770666, + "grad_norm": 0.8790820180214292, + "learning_rate": 1.9879742579068976e-05, + "loss": 0.40041595697402954, + "step": 394 + }, + { + "epoch": 0.19523044606450018, + "grad_norm": 1.0258873082657662, + "learning_rate": 1.9878475957122803e-05, + "loss": 0.45317894220352173, + "step": 395 + }, + { + "epoch": 0.19572470035833436, + "grad_norm": 0.9348755525455025, + "learning_rate": 1.987720274044723e-05, + "loss": 0.4163329005241394, + "step": 396 + }, + { + "epoch": 0.19621895465216854, + "grad_norm": 0.9706842353465618, + "learning_rate": 1.9875922929892235e-05, + "loss": 0.4252028167247772, + "step": 397 + }, + { + "epoch": 0.19671320894600272, + "grad_norm": 0.9127590943033566, + "learning_rate": 1.9874636526312202e-05, + "loss": 0.40558624267578125, + "step": 398 + }, + { + "epoch": 0.1972074632398369, + "grad_norm": 0.9762994418484081, + "learning_rate": 1.9873343530565913e-05, + "loss": 0.4352114796638489, + "step": 399 + }, + { + "epoch": 0.19770171753367108, + "grad_norm": 0.9123271316620398, + "learning_rate": 1.9872043943516556e-05, + "loss": 0.4076879024505615, + "step": 400 + }, + { + "epoch": 0.19819597182750526, + "grad_norm": 0.9627661884342358, + "learning_rate": 1.987073776603172e-05, + "loss": 0.4406166672706604, + "step": 401 + }, + { + "epoch": 0.19869022612133944, + "grad_norm": 0.8833048421451372, + "learning_rate": 1.9869424998983386e-05, + "loss": 0.3974360227584839, + "step": 402 + }, + { + "epoch": 0.19918448041517361, + "grad_norm": 0.8808806866223299, + "learning_rate": 1.9868105643247934e-05, + "loss": 0.4297831058502197, + "step": 403 + }, + { + "epoch": 0.1996787347090078, + "grad_norm": 0.9793340004481055, + "learning_rate": 1.986677969970616e-05, + "loss": 0.4214811623096466, + "step": 404 + }, + { + "epoch": 0.20017298900284197, + "grad_norm": 0.8979387674277745, + "learning_rate": 1.9865447169243234e-05, + "loss": 0.37227538228034973, + "step": 405 + }, + { + "epoch": 0.20066724329667615, + "grad_norm": 0.9492862396661451, + "learning_rate": 1.986410805274874e-05, + "loss": 0.4367320239543915, + "step": 406 + }, + { + "epoch": 0.20116149759051033, + "grad_norm": 0.9753990450504955, + "learning_rate": 1.9862762351116646e-05, + "loss": 0.4327583909034729, + "step": 407 + }, + { + "epoch": 0.2016557518843445, + "grad_norm": 0.9742332984468446, + "learning_rate": 1.9861410065245332e-05, + "loss": 0.45309939980506897, + "step": 408 + }, + { + "epoch": 0.20215000617817866, + "grad_norm": 0.9433373475369933, + "learning_rate": 1.986005119603756e-05, + "loss": 0.39196106791496277, + "step": 409 + }, + { + "epoch": 0.20264426047201284, + "grad_norm": 0.9834536288459345, + "learning_rate": 1.985868574440049e-05, + "loss": 0.4037923812866211, + "step": 410 + }, + { + "epoch": 0.20313851476584702, + "grad_norm": 0.9331733674072598, + "learning_rate": 1.9857313711245684e-05, + "loss": 0.41214677691459656, + "step": 411 + }, + { + "epoch": 0.2036327690596812, + "grad_norm": 0.9676344806099859, + "learning_rate": 1.9855935097489087e-05, + "loss": 0.4265231192111969, + "step": 412 + }, + { + "epoch": 0.20412702335351537, + "grad_norm": 0.9398051984820485, + "learning_rate": 1.9854549904051046e-05, + "loss": 0.4245712161064148, + "step": 413 + }, + { + "epoch": 0.20462127764734955, + "grad_norm": 1.0688359248893853, + "learning_rate": 1.985315813185629e-05, + "loss": 0.36296984553337097, + "step": 414 + }, + { + "epoch": 0.20511553194118373, + "grad_norm": 0.8752111789079005, + "learning_rate": 1.985175978183395e-05, + "loss": 0.3982447683811188, + "step": 415 + }, + { + "epoch": 0.2056097862350179, + "grad_norm": 0.9696106773901182, + "learning_rate": 1.9850354854917543e-05, + "loss": 0.4087941646575928, + "step": 416 + }, + { + "epoch": 0.2061040405288521, + "grad_norm": 0.9068111697273192, + "learning_rate": 1.9848943352044982e-05, + "loss": 0.4147699177265167, + "step": 417 + }, + { + "epoch": 0.20659829482268627, + "grad_norm": 0.9679150237458849, + "learning_rate": 1.9847525274158562e-05, + "loss": 0.42588335275650024, + "step": 418 + }, + { + "epoch": 0.20709254911652045, + "grad_norm": 0.8455247598954041, + "learning_rate": 1.9846100622204975e-05, + "loss": 0.42607247829437256, + "step": 419 + }, + { + "epoch": 0.20758680341035463, + "grad_norm": 0.8383230576354441, + "learning_rate": 1.9844669397135292e-05, + "loss": 0.3600303530693054, + "step": 420 + }, + { + "epoch": 0.2080810577041888, + "grad_norm": 0.9989742736396935, + "learning_rate": 1.9843231599904988e-05, + "loss": 0.47888651490211487, + "step": 421 + }, + { + "epoch": 0.20857531199802298, + "grad_norm": 0.9050077435994102, + "learning_rate": 1.9841787231473906e-05, + "loss": 0.3789903521537781, + "step": 422 + }, + { + "epoch": 0.20906956629185716, + "grad_norm": 0.9737429395044322, + "learning_rate": 1.9840336292806292e-05, + "loss": 0.3682858943939209, + "step": 423 + }, + { + "epoch": 0.20956382058569134, + "grad_norm": 0.9565489819657318, + "learning_rate": 1.9838878784870772e-05, + "loss": 0.42071375250816345, + "step": 424 + }, + { + "epoch": 0.21005807487952552, + "grad_norm": 0.8997646005118014, + "learning_rate": 1.9837414708640353e-05, + "loss": 0.4258945882320404, + "step": 425 + }, + { + "epoch": 0.2105523291733597, + "grad_norm": 0.8773247199262179, + "learning_rate": 1.9835944065092433e-05, + "loss": 0.42377644777297974, + "step": 426 + }, + { + "epoch": 0.21104658346719388, + "grad_norm": 0.8695535067011908, + "learning_rate": 1.9834466855208795e-05, + "loss": 0.35860198736190796, + "step": 427 + }, + { + "epoch": 0.21154083776102806, + "grad_norm": 0.8547283257189083, + "learning_rate": 1.9832983079975606e-05, + "loss": 0.3498537242412567, + "step": 428 + }, + { + "epoch": 0.21203509205486223, + "grad_norm": 0.9645117506541977, + "learning_rate": 1.9831492740383405e-05, + "loss": 0.3779754042625427, + "step": 429 + }, + { + "epoch": 0.2125293463486964, + "grad_norm": 0.9052431386511324, + "learning_rate": 1.9829995837427124e-05, + "loss": 0.3574570119380951, + "step": 430 + }, + { + "epoch": 0.2130236006425306, + "grad_norm": 0.9528105437455127, + "learning_rate": 1.982849237210608e-05, + "loss": 0.40678369998931885, + "step": 431 + }, + { + "epoch": 0.21351785493636477, + "grad_norm": 1.0383565017869998, + "learning_rate": 1.9826982345423955e-05, + "loss": 0.4392494261264801, + "step": 432 + }, + { + "epoch": 0.21401210923019895, + "grad_norm": 0.9595788699726988, + "learning_rate": 1.982546575838883e-05, + "loss": 0.3858703374862671, + "step": 433 + }, + { + "epoch": 0.21450636352403313, + "grad_norm": 1.022569300933342, + "learning_rate": 1.9823942612013153e-05, + "loss": 0.4427873492240906, + "step": 434 + }, + { + "epoch": 0.21500061781786728, + "grad_norm": 1.0243841009335557, + "learning_rate": 1.9822412907313756e-05, + "loss": 0.40610629320144653, + "step": 435 + }, + { + "epoch": 0.21549487211170146, + "grad_norm": 1.0647698522638835, + "learning_rate": 1.9820876645311847e-05, + "loss": 0.4181024432182312, + "step": 436 + }, + { + "epoch": 0.21598912640553564, + "grad_norm": 0.9101041422869367, + "learning_rate": 1.981933382703301e-05, + "loss": 0.39591747522354126, + "step": 437 + }, + { + "epoch": 0.21648338069936982, + "grad_norm": 1.0250837449595331, + "learning_rate": 1.9817784453507215e-05, + "loss": 0.4326947033405304, + "step": 438 + }, + { + "epoch": 0.216977634993204, + "grad_norm": 1.0886150838818542, + "learning_rate": 1.98162285257688e-05, + "loss": 0.42645522952079773, + "step": 439 + }, + { + "epoch": 0.21747188928703817, + "grad_norm": 0.978930417047399, + "learning_rate": 1.9814666044856472e-05, + "loss": 0.37372538447380066, + "step": 440 + }, + { + "epoch": 0.21796614358087235, + "grad_norm": 1.0917263900138416, + "learning_rate": 1.9813097011813328e-05, + "loss": 0.44066423177719116, + "step": 441 + }, + { + "epoch": 0.21846039787470653, + "grad_norm": 0.9730835844652884, + "learning_rate": 1.9811521427686833e-05, + "loss": 0.39892369508743286, + "step": 442 + }, + { + "epoch": 0.2189546521685407, + "grad_norm": 1.003964491264553, + "learning_rate": 1.980993929352882e-05, + "loss": 0.43497514724731445, + "step": 443 + }, + { + "epoch": 0.2194489064623749, + "grad_norm": 0.9716014988350979, + "learning_rate": 1.9808350610395504e-05, + "loss": 0.3810148239135742, + "step": 444 + }, + { + "epoch": 0.21994316075620907, + "grad_norm": 1.0156931642150575, + "learning_rate": 1.9806755379347465e-05, + "loss": 0.3952462673187256, + "step": 445 + }, + { + "epoch": 0.22043741505004324, + "grad_norm": 0.8774607433571091, + "learning_rate": 1.9805153601449655e-05, + "loss": 0.39168232679367065, + "step": 446 + }, + { + "epoch": 0.22093166934387742, + "grad_norm": 0.8991272209071992, + "learning_rate": 1.98035452777714e-05, + "loss": 0.38572901487350464, + "step": 447 + }, + { + "epoch": 0.2214259236377116, + "grad_norm": 0.9468757778036829, + "learning_rate": 1.980193040938639e-05, + "loss": 0.40514758229255676, + "step": 448 + }, + { + "epoch": 0.22192017793154578, + "grad_norm": 0.9858758484436677, + "learning_rate": 1.9800308997372696e-05, + "loss": 0.4289678931236267, + "step": 449 + }, + { + "epoch": 0.22241443222537996, + "grad_norm": 1.074259689420517, + "learning_rate": 1.979868104281274e-05, + "loss": 0.4082314670085907, + "step": 450 + }, + { + "epoch": 0.22290868651921414, + "grad_norm": 0.8691392363656588, + "learning_rate": 1.979704654679333e-05, + "loss": 0.3819827735424042, + "step": 451 + }, + { + "epoch": 0.22340294081304832, + "grad_norm": 0.9538480526249539, + "learning_rate": 1.979540551040563e-05, + "loss": 0.42063748836517334, + "step": 452 + }, + { + "epoch": 0.2238971951068825, + "grad_norm": 0.9510560747426838, + "learning_rate": 1.9793757934745166e-05, + "loss": 0.41634586453437805, + "step": 453 + }, + { + "epoch": 0.22439144940071667, + "grad_norm": 0.9597511417746731, + "learning_rate": 1.979210382091184e-05, + "loss": 0.4151400625705719, + "step": 454 + }, + { + "epoch": 0.22488570369455085, + "grad_norm": 0.9461794779595009, + "learning_rate": 1.9790443170009918e-05, + "loss": 0.40609729290008545, + "step": 455 + }, + { + "epoch": 0.22537995798838503, + "grad_norm": 0.9000627758052128, + "learning_rate": 1.9788775983148022e-05, + "loss": 0.38967129588127136, + "step": 456 + }, + { + "epoch": 0.2258742122822192, + "grad_norm": 0.9437292574418441, + "learning_rate": 1.978710226143915e-05, + "loss": 0.3833470940589905, + "step": 457 + }, + { + "epoch": 0.2263684665760534, + "grad_norm": 1.0849111028533656, + "learning_rate": 1.978542200600064e-05, + "loss": 0.42918887734413147, + "step": 458 + }, + { + "epoch": 0.22686272086988757, + "grad_norm": 0.8891911900981012, + "learning_rate": 1.978373521795422e-05, + "loss": 0.3793666660785675, + "step": 459 + }, + { + "epoch": 0.22735697516372175, + "grad_norm": 0.9329571379921634, + "learning_rate": 1.978204189842596e-05, + "loss": 0.3885256350040436, + "step": 460 + }, + { + "epoch": 0.22785122945755593, + "grad_norm": 0.9612859575938862, + "learning_rate": 1.97803420485463e-05, + "loss": 0.4003330171108246, + "step": 461 + }, + { + "epoch": 0.22834548375139008, + "grad_norm": 1.0153934251086247, + "learning_rate": 1.9778635669450026e-05, + "loss": 0.4050712585449219, + "step": 462 + }, + { + "epoch": 0.22883973804522426, + "grad_norm": 0.9955917551783842, + "learning_rate": 1.9776922762276304e-05, + "loss": 0.4003967046737671, + "step": 463 + }, + { + "epoch": 0.22933399233905843, + "grad_norm": 1.0625378898456048, + "learning_rate": 1.9775203328168643e-05, + "loss": 0.4506968855857849, + "step": 464 + }, + { + "epoch": 0.2298282466328926, + "grad_norm": 0.9586656507624374, + "learning_rate": 1.9773477368274906e-05, + "loss": 0.3947281241416931, + "step": 465 + }, + { + "epoch": 0.2303225009267268, + "grad_norm": 1.0193199601021392, + "learning_rate": 1.9771744883747326e-05, + "loss": 0.4166758954524994, + "step": 466 + }, + { + "epoch": 0.23081675522056097, + "grad_norm": 0.9824293606770813, + "learning_rate": 1.9770005875742484e-05, + "loss": 0.40400344133377075, + "step": 467 + }, + { + "epoch": 0.23131100951439515, + "grad_norm": 0.9404029827561814, + "learning_rate": 1.9768260345421312e-05, + "loss": 0.4143296480178833, + "step": 468 + }, + { + "epoch": 0.23180526380822933, + "grad_norm": 1.0496759638208417, + "learning_rate": 1.976650829394911e-05, + "loss": 0.39128193259239197, + "step": 469 + }, + { + "epoch": 0.2322995181020635, + "grad_norm": 1.033325283396431, + "learning_rate": 1.9764749722495514e-05, + "loss": 0.4305758476257324, + "step": 470 + }, + { + "epoch": 0.23279377239589769, + "grad_norm": 0.9791981730439014, + "learning_rate": 1.9762984632234523e-05, + "loss": 0.41711747646331787, + "step": 471 + }, + { + "epoch": 0.23328802668973186, + "grad_norm": 0.9590482451910926, + "learning_rate": 1.976121302434449e-05, + "loss": 0.43328845500946045, + "step": 472 + }, + { + "epoch": 0.23378228098356604, + "grad_norm": 0.9134750069589276, + "learning_rate": 1.975943490000811e-05, + "loss": 0.38707420229911804, + "step": 473 + }, + { + "epoch": 0.23427653527740022, + "grad_norm": 0.9896782154106246, + "learning_rate": 1.9757650260412438e-05, + "loss": 0.390054851770401, + "step": 474 + }, + { + "epoch": 0.2347707895712344, + "grad_norm": 1.0430972668852745, + "learning_rate": 1.9755859106748875e-05, + "loss": 0.45697346329689026, + "step": 475 + }, + { + "epoch": 0.23526504386506858, + "grad_norm": 0.950214634248398, + "learning_rate": 1.9754061440213165e-05, + "loss": 0.4381307363510132, + "step": 476 + }, + { + "epoch": 0.23575929815890276, + "grad_norm": 0.9612066818802636, + "learning_rate": 1.9752257262005403e-05, + "loss": 0.4217841625213623, + "step": 477 + }, + { + "epoch": 0.23625355245273694, + "grad_norm": 0.8699003234814695, + "learning_rate": 1.9750446573330038e-05, + "loss": 0.35968005657196045, + "step": 478 + }, + { + "epoch": 0.23674780674657112, + "grad_norm": 0.8353290173002438, + "learning_rate": 1.9748629375395856e-05, + "loss": 0.3516439199447632, + "step": 479 + }, + { + "epoch": 0.2372420610404053, + "grad_norm": 0.9683111499165196, + "learning_rate": 1.9746805669415995e-05, + "loss": 0.4078671634197235, + "step": 480 + }, + { + "epoch": 0.23773631533423947, + "grad_norm": 0.967434671965903, + "learning_rate": 1.9744975456607936e-05, + "loss": 0.39654213190078735, + "step": 481 + }, + { + "epoch": 0.23823056962807365, + "grad_norm": 0.9446129798331165, + "learning_rate": 1.9743138738193498e-05, + "loss": 0.41271698474884033, + "step": 482 + }, + { + "epoch": 0.23872482392190783, + "grad_norm": 0.9563785743614732, + "learning_rate": 1.974129551539885e-05, + "loss": 0.3957251310348511, + "step": 483 + }, + { + "epoch": 0.239219078215742, + "grad_norm": 1.0318067283466978, + "learning_rate": 1.9739445789454506e-05, + "loss": 0.39857393503189087, + "step": 484 + }, + { + "epoch": 0.2397133325095762, + "grad_norm": 0.9625937520590958, + "learning_rate": 1.973758956159531e-05, + "loss": 0.4263526499271393, + "step": 485 + }, + { + "epoch": 0.24020758680341037, + "grad_norm": 0.9782583924092142, + "learning_rate": 1.9735726833060457e-05, + "loss": 0.3849489688873291, + "step": 486 + }, + { + "epoch": 0.24070184109724455, + "grad_norm": 0.9932149128826128, + "learning_rate": 1.9733857605093476e-05, + "loss": 0.431019127368927, + "step": 487 + }, + { + "epoch": 0.2411960953910787, + "grad_norm": 0.9703866882534654, + "learning_rate": 1.973198187894224e-05, + "loss": 0.3740619421005249, + "step": 488 + }, + { + "epoch": 0.24169034968491288, + "grad_norm": 0.9420951155788563, + "learning_rate": 1.9730099655858953e-05, + "loss": 0.361680269241333, + "step": 489 + }, + { + "epoch": 0.24218460397874705, + "grad_norm": 1.0045147685747362, + "learning_rate": 1.9728210937100162e-05, + "loss": 0.41683071851730347, + "step": 490 + }, + { + "epoch": 0.24267885827258123, + "grad_norm": 1.0255058564946795, + "learning_rate": 1.9726315723926746e-05, + "loss": 0.3898739516735077, + "step": 491 + }, + { + "epoch": 0.2431731125664154, + "grad_norm": 0.992746780987763, + "learning_rate": 1.9724414017603925e-05, + "loss": 0.39339032769203186, + "step": 492 + }, + { + "epoch": 0.2436673668602496, + "grad_norm": 0.9018262406248393, + "learning_rate": 1.9722505819401255e-05, + "loss": 0.401676744222641, + "step": 493 + }, + { + "epoch": 0.24416162115408377, + "grad_norm": 0.956392375337736, + "learning_rate": 1.9720591130592613e-05, + "loss": 0.3814789056777954, + "step": 494 + }, + { + "epoch": 0.24465587544791795, + "grad_norm": 1.0339059816881517, + "learning_rate": 1.9718669952456226e-05, + "loss": 0.3980346918106079, + "step": 495 + }, + { + "epoch": 0.24515012974175213, + "grad_norm": 1.0852693818985448, + "learning_rate": 1.971674228627464e-05, + "loss": 0.4222795069217682, + "step": 496 + }, + { + "epoch": 0.2456443840355863, + "grad_norm": 0.9629746856387489, + "learning_rate": 1.971480813333474e-05, + "loss": 0.3795197904109955, + "step": 497 + }, + { + "epoch": 0.24613863832942048, + "grad_norm": 1.0428831707745134, + "learning_rate": 1.971286749492774e-05, + "loss": 0.3746161460876465, + "step": 498 + }, + { + "epoch": 0.24663289262325466, + "grad_norm": 1.0211942338953277, + "learning_rate": 1.9710920372349174e-05, + "loss": 0.3552350699901581, + "step": 499 + }, + { + "epoch": 0.24712714691708884, + "grad_norm": 0.913724645727759, + "learning_rate": 1.9708966766898925e-05, + "loss": 0.39690741896629333, + "step": 500 + }, + { + "epoch": 0.24762140121092302, + "grad_norm": 1.0179277636972188, + "learning_rate": 1.9707006679881186e-05, + "loss": 0.39530014991760254, + "step": 501 + }, + { + "epoch": 0.2481156555047572, + "grad_norm": 1.0722850381631455, + "learning_rate": 1.9705040112604483e-05, + "loss": 0.41228705644607544, + "step": 502 + }, + { + "epoch": 0.24860990979859138, + "grad_norm": 0.9774177098582278, + "learning_rate": 1.9703067066381668e-05, + "loss": 0.4330476224422455, + "step": 503 + }, + { + "epoch": 0.24910416409242556, + "grad_norm": 0.9849824106564479, + "learning_rate": 1.970108754252992e-05, + "loss": 0.38365668058395386, + "step": 504 + }, + { + "epoch": 0.24959841838625973, + "grad_norm": 1.0789440281177851, + "learning_rate": 1.969910154237074e-05, + "loss": 0.4419581890106201, + "step": 505 + }, + { + "epoch": 0.2500926726800939, + "grad_norm": 1.0828116066497757, + "learning_rate": 1.9697109067229957e-05, + "loss": 0.38741230964660645, + "step": 506 + }, + { + "epoch": 0.2505869269739281, + "grad_norm": 0.9914523280251673, + "learning_rate": 1.969511011843771e-05, + "loss": 0.41751983761787415, + "step": 507 + }, + { + "epoch": 0.25108118126776224, + "grad_norm": 0.9718169799013945, + "learning_rate": 1.9693104697328477e-05, + "loss": 0.40355241298675537, + "step": 508 + }, + { + "epoch": 0.25157543556159645, + "grad_norm": 1.003225231520968, + "learning_rate": 1.9691092805241046e-05, + "loss": 0.3511045575141907, + "step": 509 + }, + { + "epoch": 0.2520696898554306, + "grad_norm": 1.1208960250871327, + "learning_rate": 1.9689074443518526e-05, + "loss": 0.38917112350463867, + "step": 510 + }, + { + "epoch": 0.2525639441492648, + "grad_norm": 0.9640213098912707, + "learning_rate": 1.968704961350835e-05, + "loss": 0.40256473422050476, + "step": 511 + }, + { + "epoch": 0.25305819844309896, + "grad_norm": 0.8857886708710384, + "learning_rate": 1.968501831656226e-05, + "loss": 0.32350897789001465, + "step": 512 + }, + { + "epoch": 0.25355245273693316, + "grad_norm": 1.0209548318094466, + "learning_rate": 1.9682980554036322e-05, + "loss": 0.36787012219429016, + "step": 513 + }, + { + "epoch": 0.2540467070307673, + "grad_norm": 1.063374274844625, + "learning_rate": 1.9680936327290924e-05, + "loss": 0.4035605490207672, + "step": 514 + }, + { + "epoch": 0.2545409613246015, + "grad_norm": 0.9437423188361623, + "learning_rate": 1.9678885637690755e-05, + "loss": 0.39402660727500916, + "step": 515 + }, + { + "epoch": 0.2550352156184357, + "grad_norm": 1.1793476229973228, + "learning_rate": 1.967682848660483e-05, + "loss": 0.37553271651268005, + "step": 516 + }, + { + "epoch": 0.2555294699122699, + "grad_norm": 1.047789732428987, + "learning_rate": 1.9674764875406472e-05, + "loss": 0.40148675441741943, + "step": 517 + }, + { + "epoch": 0.25602372420610403, + "grad_norm": 1.1994265366678782, + "learning_rate": 1.967269480547332e-05, + "loss": 0.45255252718925476, + "step": 518 + }, + { + "epoch": 0.25651797849993824, + "grad_norm": 1.0116666478277523, + "learning_rate": 1.9670618278187318e-05, + "loss": 0.4183574616909027, + "step": 519 + }, + { + "epoch": 0.2570122327937724, + "grad_norm": 0.9518606397664687, + "learning_rate": 1.9668535294934733e-05, + "loss": 0.3950796127319336, + "step": 520 + }, + { + "epoch": 0.2575064870876066, + "grad_norm": 0.9729673190351172, + "learning_rate": 1.9666445857106132e-05, + "loss": 0.4062424898147583, + "step": 521 + }, + { + "epoch": 0.25800074138144075, + "grad_norm": 0.9474577180562711, + "learning_rate": 1.966434996609639e-05, + "loss": 0.4095906913280487, + "step": 522 + }, + { + "epoch": 0.25849499567527495, + "grad_norm": 1.1739974412660419, + "learning_rate": 1.96622476233047e-05, + "loss": 0.42302393913269043, + "step": 523 + }, + { + "epoch": 0.2589892499691091, + "grad_norm": 1.0746371790844444, + "learning_rate": 1.966013883013455e-05, + "loss": 0.43204039335250854, + "step": 524 + }, + { + "epoch": 0.2594835042629433, + "grad_norm": 0.9744852361980706, + "learning_rate": 1.9658023587993748e-05, + "loss": 0.39941906929016113, + "step": 525 + }, + { + "epoch": 0.25997775855677746, + "grad_norm": 0.9322675006976836, + "learning_rate": 1.9655901898294397e-05, + "loss": 0.37053728103637695, + "step": 526 + }, + { + "epoch": 0.26047201285061167, + "grad_norm": 0.9500036404091089, + "learning_rate": 1.96537737624529e-05, + "loss": 0.4126317501068115, + "step": 527 + }, + { + "epoch": 0.2609662671444458, + "grad_norm": 0.9592560956850021, + "learning_rate": 1.9651639181889975e-05, + "loss": 0.42397794127464294, + "step": 528 + }, + { + "epoch": 0.26146052143827997, + "grad_norm": 1.09730750123291, + "learning_rate": 1.964949815803064e-05, + "loss": 0.3606872260570526, + "step": 529 + }, + { + "epoch": 0.2619547757321142, + "grad_norm": 1.0256203362936218, + "learning_rate": 1.9647350692304206e-05, + "loss": 0.420923113822937, + "step": 530 + }, + { + "epoch": 0.2624490300259483, + "grad_norm": 1.0242401280009386, + "learning_rate": 1.9645196786144298e-05, + "loss": 0.41700440645217896, + "step": 531 + }, + { + "epoch": 0.26294328431978253, + "grad_norm": 0.9861507549209962, + "learning_rate": 1.9643036440988825e-05, + "loss": 0.3961814045906067, + "step": 532 + }, + { + "epoch": 0.2634375386136167, + "grad_norm": 0.9400998714081333, + "learning_rate": 1.9640869658280005e-05, + "loss": 0.4025250971317291, + "step": 533 + }, + { + "epoch": 0.2639317929074509, + "grad_norm": 1.0201682019086518, + "learning_rate": 1.9638696439464357e-05, + "loss": 0.38828611373901367, + "step": 534 + }, + { + "epoch": 0.26442604720128504, + "grad_norm": 0.8944214314341241, + "learning_rate": 1.963651678599268e-05, + "loss": 0.3109109401702881, + "step": 535 + }, + { + "epoch": 0.26492030149511925, + "grad_norm": 1.0758326810562073, + "learning_rate": 1.963433069932009e-05, + "loss": 0.41516438126564026, + "step": 536 + }, + { + "epoch": 0.2654145557889534, + "grad_norm": 0.972035022615468, + "learning_rate": 1.9632138180905982e-05, + "loss": 0.3765295743942261, + "step": 537 + }, + { + "epoch": 0.2659088100827876, + "grad_norm": 1.0590611315407708, + "learning_rate": 1.9629939232214052e-05, + "loss": 0.37631309032440186, + "step": 538 + }, + { + "epoch": 0.26640306437662176, + "grad_norm": 0.9543257606304313, + "learning_rate": 1.9627733854712286e-05, + "loss": 0.3640018403530121, + "step": 539 + }, + { + "epoch": 0.26689731867045596, + "grad_norm": 1.0213174253270256, + "learning_rate": 1.9625522049872962e-05, + "loss": 0.3971521854400635, + "step": 540 + }, + { + "epoch": 0.2673915729642901, + "grad_norm": 1.0059131210770185, + "learning_rate": 1.962330381917265e-05, + "loss": 0.4218612313270569, + "step": 541 + }, + { + "epoch": 0.2678858272581243, + "grad_norm": 1.0124871124462342, + "learning_rate": 1.9621079164092203e-05, + "loss": 0.38814622163772583, + "step": 542 + }, + { + "epoch": 0.26838008155195847, + "grad_norm": 1.0310689772428585, + "learning_rate": 1.961884808611678e-05, + "loss": 0.3912709355354309, + "step": 543 + }, + { + "epoch": 0.2688743358457927, + "grad_norm": 0.9919097213748044, + "learning_rate": 1.9616610586735808e-05, + "loss": 0.4007106423377991, + "step": 544 + }, + { + "epoch": 0.26936859013962683, + "grad_norm": 0.9871985402956727, + "learning_rate": 1.9614366667443016e-05, + "loss": 0.37406057119369507, + "step": 545 + }, + { + "epoch": 0.26986284443346104, + "grad_norm": 0.970768236440829, + "learning_rate": 1.961211632973641e-05, + "loss": 0.4187811613082886, + "step": 546 + }, + { + "epoch": 0.2703570987272952, + "grad_norm": 1.049304525520643, + "learning_rate": 1.960985957511828e-05, + "loss": 0.44418057799339294, + "step": 547 + }, + { + "epoch": 0.2708513530211294, + "grad_norm": 1.0048719478421346, + "learning_rate": 1.9607596405095205e-05, + "loss": 0.41016438603401184, + "step": 548 + }, + { + "epoch": 0.27134560731496354, + "grad_norm": 1.2563417457062223, + "learning_rate": 1.9605326821178047e-05, + "loss": 0.39461439847946167, + "step": 549 + }, + { + "epoch": 0.27183986160879775, + "grad_norm": 0.9443238609304102, + "learning_rate": 1.960305082488195e-05, + "loss": 0.4159786105155945, + "step": 550 + }, + { + "epoch": 0.2723341159026319, + "grad_norm": 0.9387957037755528, + "learning_rate": 1.960076841772633e-05, + "loss": 0.3702941834926605, + "step": 551 + }, + { + "epoch": 0.2728283701964661, + "grad_norm": 1.0745575617770338, + "learning_rate": 1.9598479601234894e-05, + "loss": 0.3482900559902191, + "step": 552 + }, + { + "epoch": 0.27332262449030026, + "grad_norm": 1.1412061517783256, + "learning_rate": 1.9596184376935618e-05, + "loss": 0.40550655126571655, + "step": 553 + }, + { + "epoch": 0.2738168787841344, + "grad_norm": 0.9446073244587436, + "learning_rate": 1.9593882746360767e-05, + "loss": 0.38604867458343506, + "step": 554 + }, + { + "epoch": 0.2743111330779686, + "grad_norm": 0.9388567147005249, + "learning_rate": 1.9591574711046876e-05, + "loss": 0.36586758494377136, + "step": 555 + }, + { + "epoch": 0.27480538737180277, + "grad_norm": 0.9730414125092071, + "learning_rate": 1.958926027253475e-05, + "loss": 0.37780559062957764, + "step": 556 + }, + { + "epoch": 0.275299641665637, + "grad_norm": 0.9401659835761762, + "learning_rate": 1.9586939432369486e-05, + "loss": 0.3837544322013855, + "step": 557 + }, + { + "epoch": 0.2757938959594711, + "grad_norm": 1.038905164013387, + "learning_rate": 1.9584612192100433e-05, + "loss": 0.39425861835479736, + "step": 558 + }, + { + "epoch": 0.27628815025330533, + "grad_norm": 1.0791545750316935, + "learning_rate": 1.958227855328123e-05, + "loss": 0.4008832275867462, + "step": 559 + }, + { + "epoch": 0.2767824045471395, + "grad_norm": 1.0509839705522974, + "learning_rate": 1.957993851746978e-05, + "loss": 0.42411595582962036, + "step": 560 + }, + { + "epoch": 0.2772766588409737, + "grad_norm": 1.1626138880546706, + "learning_rate": 1.9577592086228257e-05, + "loss": 0.4028055965900421, + "step": 561 + }, + { + "epoch": 0.27777091313480784, + "grad_norm": 0.9383996498843509, + "learning_rate": 1.9575239261123102e-05, + "loss": 0.3785157799720764, + "step": 562 + }, + { + "epoch": 0.27826516742864205, + "grad_norm": 0.9289370196839293, + "learning_rate": 1.9572880043725032e-05, + "loss": 0.3726264536380768, + "step": 563 + }, + { + "epoch": 0.2787594217224762, + "grad_norm": 0.9959287145902769, + "learning_rate": 1.957051443560902e-05, + "loss": 0.37261486053466797, + "step": 564 + }, + { + "epoch": 0.2792536760163104, + "grad_norm": 0.9394373844868922, + "learning_rate": 1.956814243835432e-05, + "loss": 0.34781068563461304, + "step": 565 + }, + { + "epoch": 0.27974793031014455, + "grad_norm": 0.9899407389551799, + "learning_rate": 1.956576405354444e-05, + "loss": 0.3828197121620178, + "step": 566 + }, + { + "epoch": 0.28024218460397876, + "grad_norm": 0.9387592741594649, + "learning_rate": 1.9563379282767156e-05, + "loss": 0.3839726150035858, + "step": 567 + }, + { + "epoch": 0.2807364388978129, + "grad_norm": 1.053498529947078, + "learning_rate": 1.9560988127614507e-05, + "loss": 0.3658025562763214, + "step": 568 + }, + { + "epoch": 0.2812306931916471, + "grad_norm": 1.064206434015044, + "learning_rate": 1.9558590589682795e-05, + "loss": 0.400045782327652, + "step": 569 + }, + { + "epoch": 0.28172494748548127, + "grad_norm": 0.9470530474737298, + "learning_rate": 1.955618667057258e-05, + "loss": 0.36586880683898926, + "step": 570 + }, + { + "epoch": 0.2822192017793155, + "grad_norm": 1.0137760854012388, + "learning_rate": 1.9553776371888684e-05, + "loss": 0.3886389136314392, + "step": 571 + }, + { + "epoch": 0.2827134560731496, + "grad_norm": 1.0159520278130145, + "learning_rate": 1.955135969524019e-05, + "loss": 0.37858110666275024, + "step": 572 + }, + { + "epoch": 0.28320771036698383, + "grad_norm": 0.939134880585939, + "learning_rate": 1.9548936642240435e-05, + "loss": 0.3264877498149872, + "step": 573 + }, + { + "epoch": 0.283701964660818, + "grad_norm": 1.1465399296789363, + "learning_rate": 1.9546507214507017e-05, + "loss": 0.3756924569606781, + "step": 574 + }, + { + "epoch": 0.2841962189546522, + "grad_norm": 1.0922050133590595, + "learning_rate": 1.9544071413661783e-05, + "loss": 0.3773806691169739, + "step": 575 + }, + { + "epoch": 0.28469047324848634, + "grad_norm": 1.0432958526312845, + "learning_rate": 1.9541629241330842e-05, + "loss": 0.37437382340431213, + "step": 576 + }, + { + "epoch": 0.28518472754232055, + "grad_norm": 0.9730241652440514, + "learning_rate": 1.9539180699144552e-05, + "loss": 0.3835929036140442, + "step": 577 + }, + { + "epoch": 0.2856789818361547, + "grad_norm": 1.2039096391780213, + "learning_rate": 1.9536725788737528e-05, + "loss": 0.39163681864738464, + "step": 578 + }, + { + "epoch": 0.2861732361299889, + "grad_norm": 1.1007303408462066, + "learning_rate": 1.953426451174863e-05, + "loss": 0.39241698384284973, + "step": 579 + }, + { + "epoch": 0.28666749042382306, + "grad_norm": 0.9748115984741068, + "learning_rate": 1.953179686982097e-05, + "loss": 0.32731348276138306, + "step": 580 + }, + { + "epoch": 0.2871617447176572, + "grad_norm": 0.9649406632940735, + "learning_rate": 1.9529322864601915e-05, + "loss": 0.34735041856765747, + "step": 581 + }, + { + "epoch": 0.2876559990114914, + "grad_norm": 1.0831552948058796, + "learning_rate": 1.952684249774307e-05, + "loss": 0.3795308470726013, + "step": 582 + }, + { + "epoch": 0.28815025330532557, + "grad_norm": 1.0599543241474398, + "learning_rate": 1.95243557709003e-05, + "loss": 0.3546086549758911, + "step": 583 + }, + { + "epoch": 0.28864450759915977, + "grad_norm": 0.9634030800835625, + "learning_rate": 1.9521862685733703e-05, + "loss": 0.35397839546203613, + "step": 584 + }, + { + "epoch": 0.2891387618929939, + "grad_norm": 0.972134968680729, + "learning_rate": 1.9519363243907627e-05, + "loss": 0.350521981716156, + "step": 585 + }, + { + "epoch": 0.28963301618682813, + "grad_norm": 1.0201322204570258, + "learning_rate": 1.9516857447090663e-05, + "loss": 0.380625456571579, + "step": 586 + }, + { + "epoch": 0.2901272704806623, + "grad_norm": 0.9847688200101109, + "learning_rate": 1.9514345296955647e-05, + "loss": 0.40378236770629883, + "step": 587 + }, + { + "epoch": 0.2906215247744965, + "grad_norm": 1.0122113576142937, + "learning_rate": 1.9511826795179653e-05, + "loss": 0.4050450325012207, + "step": 588 + }, + { + "epoch": 0.29111577906833064, + "grad_norm": 1.048628562831542, + "learning_rate": 1.9509301943444e-05, + "loss": 0.3772329092025757, + "step": 589 + }, + { + "epoch": 0.29161003336216484, + "grad_norm": 1.0803687765146506, + "learning_rate": 1.9506770743434244e-05, + "loss": 0.4079870581626892, + "step": 590 + }, + { + "epoch": 0.292104287655999, + "grad_norm": 1.0069688403525805, + "learning_rate": 1.950423319684017e-05, + "loss": 0.4233503043651581, + "step": 591 + }, + { + "epoch": 0.2925985419498332, + "grad_norm": 1.0403594154189246, + "learning_rate": 1.9501689305355814e-05, + "loss": 0.395530104637146, + "step": 592 + }, + { + "epoch": 0.29309279624366735, + "grad_norm": 1.0468686113369423, + "learning_rate": 1.949913907067944e-05, + "loss": 0.4266175925731659, + "step": 593 + }, + { + "epoch": 0.29358705053750156, + "grad_norm": 1.0371386643985676, + "learning_rate": 1.949658249451355e-05, + "loss": 0.4428660571575165, + "step": 594 + }, + { + "epoch": 0.2940813048313357, + "grad_norm": 0.928511699803538, + "learning_rate": 1.9494019578564874e-05, + "loss": 0.36831945180892944, + "step": 595 + }, + { + "epoch": 0.2945755591251699, + "grad_norm": 1.059362576098806, + "learning_rate": 1.949145032454438e-05, + "loss": 0.392259806394577, + "step": 596 + }, + { + "epoch": 0.29506981341900407, + "grad_norm": 0.9638882642169329, + "learning_rate": 1.948887473416727e-05, + "loss": 0.43743032217025757, + "step": 597 + }, + { + "epoch": 0.2955640677128383, + "grad_norm": 0.9566828851720006, + "learning_rate": 1.9486292809152965e-05, + "loss": 0.3725258409976959, + "step": 598 + }, + { + "epoch": 0.2960583220066724, + "grad_norm": 0.9479087116485218, + "learning_rate": 1.948370455122512e-05, + "loss": 0.39507436752319336, + "step": 599 + }, + { + "epoch": 0.29655257630050663, + "grad_norm": 1.0509892705512045, + "learning_rate": 1.9481109962111623e-05, + "loss": 0.40915870666503906, + "step": 600 + }, + { + "epoch": 0.2970468305943408, + "grad_norm": 0.9528830591600533, + "learning_rate": 1.947850904354459e-05, + "loss": 0.3465006351470947, + "step": 601 + }, + { + "epoch": 0.297541084888175, + "grad_norm": 1.0989483899383072, + "learning_rate": 1.9475901797260346e-05, + "loss": 0.4205567538738251, + "step": 602 + }, + { + "epoch": 0.29803533918200914, + "grad_norm": 0.9986003576186586, + "learning_rate": 1.9473288224999455e-05, + "loss": 0.37682560086250305, + "step": 603 + }, + { + "epoch": 0.29852959347584335, + "grad_norm": 1.0393693996744362, + "learning_rate": 1.9470668328506705e-05, + "loss": 0.3865458369255066, + "step": 604 + }, + { + "epoch": 0.2990238477696775, + "grad_norm": 0.9426218637426483, + "learning_rate": 1.9468042109531096e-05, + "loss": 0.36366063356399536, + "step": 605 + }, + { + "epoch": 0.2995181020635117, + "grad_norm": 0.9801320950707162, + "learning_rate": 1.9465409569825857e-05, + "loss": 0.3861471116542816, + "step": 606 + }, + { + "epoch": 0.30001235635734586, + "grad_norm": 1.0257103381374684, + "learning_rate": 1.9462770711148433e-05, + "loss": 0.3499199151992798, + "step": 607 + }, + { + "epoch": 0.30050661065118, + "grad_norm": 1.1030346241860873, + "learning_rate": 1.946012553526049e-05, + "loss": 0.3704417943954468, + "step": 608 + }, + { + "epoch": 0.3010008649450142, + "grad_norm": 1.0751948386377395, + "learning_rate": 1.9457474043927908e-05, + "loss": 0.41278937458992004, + "step": 609 + }, + { + "epoch": 0.30149511923884836, + "grad_norm": 1.0379271128545955, + "learning_rate": 1.9454816238920787e-05, + "loss": 0.36078256368637085, + "step": 610 + }, + { + "epoch": 0.30198937353268257, + "grad_norm": 1.05890389444684, + "learning_rate": 1.9452152122013434e-05, + "loss": 0.3713051676750183, + "step": 611 + }, + { + "epoch": 0.3024836278265167, + "grad_norm": 1.0547983951495754, + "learning_rate": 1.9449481694984382e-05, + "loss": 0.3919684886932373, + "step": 612 + }, + { + "epoch": 0.3029778821203509, + "grad_norm": 1.1211767888578545, + "learning_rate": 1.9446804959616364e-05, + "loss": 0.4249044358730316, + "step": 613 + }, + { + "epoch": 0.3034721364141851, + "grad_norm": 1.0386798112962086, + "learning_rate": 1.9444121917696335e-05, + "loss": 0.4033172130584717, + "step": 614 + }, + { + "epoch": 0.3039663907080193, + "grad_norm": 1.020453301484689, + "learning_rate": 1.9441432571015455e-05, + "loss": 0.35740789771080017, + "step": 615 + }, + { + "epoch": 0.30446064500185344, + "grad_norm": 1.0567402195641693, + "learning_rate": 1.9438736921369093e-05, + "loss": 0.41219189763069153, + "step": 616 + }, + { + "epoch": 0.30495489929568764, + "grad_norm": 1.0356137182677312, + "learning_rate": 1.9436034970556824e-05, + "loss": 0.3751283884048462, + "step": 617 + }, + { + "epoch": 0.3054491535895218, + "grad_norm": 1.0460808776118622, + "learning_rate": 1.9433326720382433e-05, + "loss": 0.40294593572616577, + "step": 618 + }, + { + "epoch": 0.305943407883356, + "grad_norm": 1.0087358245362568, + "learning_rate": 1.943061217265391e-05, + "loss": 0.4163772463798523, + "step": 619 + }, + { + "epoch": 0.30643766217719015, + "grad_norm": 1.007467123707354, + "learning_rate": 1.9427891329183444e-05, + "loss": 0.3796529769897461, + "step": 620 + }, + { + "epoch": 0.30693191647102436, + "grad_norm": 1.0905533067383615, + "learning_rate": 1.942516419178744e-05, + "loss": 0.44097092747688293, + "step": 621 + }, + { + "epoch": 0.3074261707648585, + "grad_norm": 0.9615172689674734, + "learning_rate": 1.942243076228649e-05, + "loss": 0.384232759475708, + "step": 622 + }, + { + "epoch": 0.3079204250586927, + "grad_norm": 0.9038435200954008, + "learning_rate": 1.941969104250539e-05, + "loss": 0.3734084367752075, + "step": 623 + }, + { + "epoch": 0.30841467935252687, + "grad_norm": 0.9414597847653995, + "learning_rate": 1.9416945034273142e-05, + "loss": 0.3532239496707916, + "step": 624 + }, + { + "epoch": 0.3089089336463611, + "grad_norm": 1.0668895366566058, + "learning_rate": 1.941419273942294e-05, + "loss": 0.39430537819862366, + "step": 625 + }, + { + "epoch": 0.3094031879401952, + "grad_norm": 1.0091341034087684, + "learning_rate": 1.941143415979218e-05, + "loss": 0.35790857672691345, + "step": 626 + }, + { + "epoch": 0.30989744223402943, + "grad_norm": 1.0381854826035726, + "learning_rate": 1.9408669297222446e-05, + "loss": 0.3684060871601105, + "step": 627 + }, + { + "epoch": 0.3103916965278636, + "grad_norm": 0.9553898295016832, + "learning_rate": 1.9405898153559522e-05, + "loss": 0.3425355553627014, + "step": 628 + }, + { + "epoch": 0.3108859508216978, + "grad_norm": 0.9032294986887355, + "learning_rate": 1.9403120730653387e-05, + "loss": 0.3295109272003174, + "step": 629 + }, + { + "epoch": 0.31138020511553194, + "grad_norm": 1.0576168899253493, + "learning_rate": 1.940033703035821e-05, + "loss": 0.37015989422798157, + "step": 630 + }, + { + "epoch": 0.31187445940936614, + "grad_norm": 1.1361288169710941, + "learning_rate": 1.939754705453234e-05, + "loss": 0.40625980496406555, + "step": 631 + }, + { + "epoch": 0.3123687137032003, + "grad_norm": 1.3354529260238757, + "learning_rate": 1.939475080503833e-05, + "loss": 0.42503830790519714, + "step": 632 + }, + { + "epoch": 0.31286296799703445, + "grad_norm": 1.0863606838535078, + "learning_rate": 1.939194828374292e-05, + "loss": 0.36230289936065674, + "step": 633 + }, + { + "epoch": 0.31335722229086865, + "grad_norm": 0.9800314584790245, + "learning_rate": 1.938913949251703e-05, + "loss": 0.4128720164299011, + "step": 634 + }, + { + "epoch": 0.3138514765847028, + "grad_norm": 1.1018828002960295, + "learning_rate": 1.938632443323577e-05, + "loss": 0.39706575870513916, + "step": 635 + }, + { + "epoch": 0.314345730878537, + "grad_norm": 1.0451325322820368, + "learning_rate": 1.9383503107778434e-05, + "loss": 0.38395214080810547, + "step": 636 + }, + { + "epoch": 0.31483998517237116, + "grad_norm": 0.9669746428685202, + "learning_rate": 1.9380675518028495e-05, + "loss": 0.3629944324493408, + "step": 637 + }, + { + "epoch": 0.31533423946620537, + "grad_norm": 1.0589959103814197, + "learning_rate": 1.937784166587361e-05, + "loss": 0.39474761486053467, + "step": 638 + }, + { + "epoch": 0.3158284937600395, + "grad_norm": 1.085403264447479, + "learning_rate": 1.9375001553205627e-05, + "loss": 0.423098087310791, + "step": 639 + }, + { + "epoch": 0.3163227480538737, + "grad_norm": 0.9239589256190138, + "learning_rate": 1.937215518192056e-05, + "loss": 0.3453904986381531, + "step": 640 + }, + { + "epoch": 0.3168170023477079, + "grad_norm": 0.9432054956835023, + "learning_rate": 1.9369302553918605e-05, + "loss": 0.3659127354621887, + "step": 641 + }, + { + "epoch": 0.3173112566415421, + "grad_norm": 1.060860081964917, + "learning_rate": 1.9366443671104132e-05, + "loss": 0.3613426089286804, + "step": 642 + }, + { + "epoch": 0.31780551093537623, + "grad_norm": 0.9515218135636598, + "learning_rate": 1.93635785353857e-05, + "loss": 0.3556531071662903, + "step": 643 + }, + { + "epoch": 0.31829976522921044, + "grad_norm": 0.9893630091198329, + "learning_rate": 1.9360707148676022e-05, + "loss": 0.3515596091747284, + "step": 644 + }, + { + "epoch": 0.3187940195230446, + "grad_norm": 0.9802147109168395, + "learning_rate": 1.9357829512892e-05, + "loss": 0.36270469427108765, + "step": 645 + }, + { + "epoch": 0.3192882738168788, + "grad_norm": 0.9936651325349853, + "learning_rate": 1.9354945629954706e-05, + "loss": 0.3617076277732849, + "step": 646 + }, + { + "epoch": 0.31978252811071295, + "grad_norm": 1.0835943099678094, + "learning_rate": 1.9352055501789376e-05, + "loss": 0.3888331949710846, + "step": 647 + }, + { + "epoch": 0.32027678240454716, + "grad_norm": 1.0454884563674065, + "learning_rate": 1.9349159130325413e-05, + "loss": 0.41199982166290283, + "step": 648 + }, + { + "epoch": 0.3207710366983813, + "grad_norm": 1.0758693507529822, + "learning_rate": 1.93462565174964e-05, + "loss": 0.3878370225429535, + "step": 649 + }, + { + "epoch": 0.3212652909922155, + "grad_norm": 1.0303850194409756, + "learning_rate": 1.9343347665240077e-05, + "loss": 0.380184531211853, + "step": 650 + }, + { + "epoch": 0.32175954528604966, + "grad_norm": 1.143999159363527, + "learning_rate": 1.9340432575498355e-05, + "loss": 0.3746795356273651, + "step": 651 + }, + { + "epoch": 0.32225379957988387, + "grad_norm": 1.0188863097829193, + "learning_rate": 1.93375112502173e-05, + "loss": 0.3700905442237854, + "step": 652 + }, + { + "epoch": 0.322748053873718, + "grad_norm": 0.9032826115280742, + "learning_rate": 1.9334583691347153e-05, + "loss": 0.3331850469112396, + "step": 653 + }, + { + "epoch": 0.32324230816755223, + "grad_norm": 0.949854268007892, + "learning_rate": 1.933164990084231e-05, + "loss": 0.3397464156150818, + "step": 654 + }, + { + "epoch": 0.3237365624613864, + "grad_norm": 1.1199806793436613, + "learning_rate": 1.9328709880661326e-05, + "loss": 0.3837242126464844, + "step": 655 + }, + { + "epoch": 0.3242308167552206, + "grad_norm": 1.0801449332087112, + "learning_rate": 1.9325763632766916e-05, + "loss": 0.38854193687438965, + "step": 656 + }, + { + "epoch": 0.32472507104905474, + "grad_norm": 1.1330798719469783, + "learning_rate": 1.9322811159125955e-05, + "loss": 0.41792556643486023, + "step": 657 + }, + { + "epoch": 0.32521932534288894, + "grad_norm": 0.9831880252943476, + "learning_rate": 1.931985246170947e-05, + "loss": 0.3968243896961212, + "step": 658 + }, + { + "epoch": 0.3257135796367231, + "grad_norm": 1.0416971268065567, + "learning_rate": 1.9316887542492645e-05, + "loss": 0.41183531284332275, + "step": 659 + }, + { + "epoch": 0.32620783393055724, + "grad_norm": 1.0367106782684, + "learning_rate": 1.931391640345482e-05, + "loss": 0.36057350039482117, + "step": 660 + }, + { + "epoch": 0.32670208822439145, + "grad_norm": 1.0663955736026025, + "learning_rate": 1.9310939046579482e-05, + "loss": 0.36032363772392273, + "step": 661 + }, + { + "epoch": 0.3271963425182256, + "grad_norm": 0.9657326304523917, + "learning_rate": 1.9307955473854275e-05, + "loss": 0.3682931363582611, + "step": 662 + }, + { + "epoch": 0.3276905968120598, + "grad_norm": 1.004896861978755, + "learning_rate": 1.9304965687270987e-05, + "loss": 0.3829198181629181, + "step": 663 + }, + { + "epoch": 0.32818485110589396, + "grad_norm": 1.0180253035605964, + "learning_rate": 1.930196968882556e-05, + "loss": 0.3901137709617615, + "step": 664 + }, + { + "epoch": 0.32867910539972817, + "grad_norm": 0.9037607838463562, + "learning_rate": 1.9298967480518077e-05, + "loss": 0.34352344274520874, + "step": 665 + }, + { + "epoch": 0.3291733596935623, + "grad_norm": 0.9918701152773953, + "learning_rate": 1.9295959064352767e-05, + "loss": 0.38822662830352783, + "step": 666 + }, + { + "epoch": 0.3296676139873965, + "grad_norm": 0.9619347095581623, + "learning_rate": 1.9292944442338013e-05, + "loss": 0.3639586567878723, + "step": 667 + }, + { + "epoch": 0.3301618682812307, + "grad_norm": 1.0248410702019595, + "learning_rate": 1.9289923616486326e-05, + "loss": 0.38537997007369995, + "step": 668 + }, + { + "epoch": 0.3306561225750649, + "grad_norm": 0.9469693142742907, + "learning_rate": 1.9286896588814373e-05, + "loss": 0.3514263331890106, + "step": 669 + }, + { + "epoch": 0.33115037686889903, + "grad_norm": 0.9776369401143131, + "learning_rate": 1.928386336134295e-05, + "loss": 0.3873803913593292, + "step": 670 + }, + { + "epoch": 0.33164463116273324, + "grad_norm": 1.0063829461952047, + "learning_rate": 1.9280823936096994e-05, + "loss": 0.36644282937049866, + "step": 671 + }, + { + "epoch": 0.3321388854565674, + "grad_norm": 0.8900960907324665, + "learning_rate": 1.9277778315105587e-05, + "loss": 0.34837427735328674, + "step": 672 + }, + { + "epoch": 0.3326331397504016, + "grad_norm": 1.0946494998655654, + "learning_rate": 1.927472650040194e-05, + "loss": 0.3879021406173706, + "step": 673 + }, + { + "epoch": 0.33312739404423575, + "grad_norm": 1.0256193203663788, + "learning_rate": 1.9271668494023404e-05, + "loss": 0.3753926753997803, + "step": 674 + }, + { + "epoch": 0.33362164833806995, + "grad_norm": 1.1193381317991955, + "learning_rate": 1.9268604298011454e-05, + "loss": 0.35362815856933594, + "step": 675 + }, + { + "epoch": 0.3341159026319041, + "grad_norm": 1.0612190451852097, + "learning_rate": 1.926553391441171e-05, + "loss": 0.3685564696788788, + "step": 676 + }, + { + "epoch": 0.3346101569257383, + "grad_norm": 1.2837359031878948, + "learning_rate": 1.926245734527391e-05, + "loss": 0.42326927185058594, + "step": 677 + }, + { + "epoch": 0.33510441121957246, + "grad_norm": 1.0247968871472715, + "learning_rate": 1.925937459265193e-05, + "loss": 0.35918861627578735, + "step": 678 + }, + { + "epoch": 0.33559866551340667, + "grad_norm": 1.1358099673309532, + "learning_rate": 1.9256285658603773e-05, + "loss": 0.38703471422195435, + "step": 679 + }, + { + "epoch": 0.3360929198072408, + "grad_norm": 1.0232813577835114, + "learning_rate": 1.9253190545191567e-05, + "loss": 0.3993009924888611, + "step": 680 + }, + { + "epoch": 0.336587174101075, + "grad_norm": 1.178587285681796, + "learning_rate": 1.9250089254481566e-05, + "loss": 0.3998498320579529, + "step": 681 + }, + { + "epoch": 0.3370814283949092, + "grad_norm": 1.0577657705862298, + "learning_rate": 1.9246981788544145e-05, + "loss": 0.37211501598358154, + "step": 682 + }, + { + "epoch": 0.3375756826887434, + "grad_norm": 1.0126592857393306, + "learning_rate": 1.9243868149453806e-05, + "loss": 0.37204745411872864, + "step": 683 + }, + { + "epoch": 0.33806993698257753, + "grad_norm": 0.9626025917248462, + "learning_rate": 1.924074833928917e-05, + "loss": 0.3784663677215576, + "step": 684 + }, + { + "epoch": 0.33856419127641174, + "grad_norm": 1.0085796667337208, + "learning_rate": 1.9237622360132975e-05, + "loss": 0.4140951633453369, + "step": 685 + }, + { + "epoch": 0.3390584455702459, + "grad_norm": 1.0251059918961796, + "learning_rate": 1.9234490214072083e-05, + "loss": 0.3723721504211426, + "step": 686 + }, + { + "epoch": 0.33955269986408004, + "grad_norm": 1.0704762953012439, + "learning_rate": 1.923135190319747e-05, + "loss": 0.3714251220226288, + "step": 687 + }, + { + "epoch": 0.34004695415791425, + "grad_norm": 2.286186750342226, + "learning_rate": 1.9228207429604224e-05, + "loss": 0.3551461696624756, + "step": 688 + }, + { + "epoch": 0.3405412084517484, + "grad_norm": 1.0184392375158444, + "learning_rate": 1.9225056795391554e-05, + "loss": 0.3543378412723541, + "step": 689 + }, + { + "epoch": 0.3410354627455826, + "grad_norm": 0.9670805241747071, + "learning_rate": 1.922190000266278e-05, + "loss": 0.3405894935131073, + "step": 690 + }, + { + "epoch": 0.34152971703941676, + "grad_norm": 1.0375943311061684, + "learning_rate": 1.9218737053525324e-05, + "loss": 0.36478808522224426, + "step": 691 + }, + { + "epoch": 0.34202397133325096, + "grad_norm": 1.036881907490894, + "learning_rate": 1.9215567950090734e-05, + "loss": 0.39778709411621094, + "step": 692 + }, + { + "epoch": 0.3425182256270851, + "grad_norm": 0.9719804294561131, + "learning_rate": 1.9212392694474654e-05, + "loss": 0.3553788661956787, + "step": 693 + }, + { + "epoch": 0.3430124799209193, + "grad_norm": 1.0265620111261864, + "learning_rate": 1.920921128879684e-05, + "loss": 0.3393115997314453, + "step": 694 + }, + { + "epoch": 0.3435067342147535, + "grad_norm": 1.2003228723584403, + "learning_rate": 1.9206023735181154e-05, + "loss": 0.4240456819534302, + "step": 695 + }, + { + "epoch": 0.3440009885085877, + "grad_norm": 1.0687040296992496, + "learning_rate": 1.920283003575556e-05, + "loss": 0.3451164960861206, + "step": 696 + }, + { + "epoch": 0.34449524280242183, + "grad_norm": 1.0859108204006387, + "learning_rate": 1.919963019265213e-05, + "loss": 0.4328063726425171, + "step": 697 + }, + { + "epoch": 0.34498949709625604, + "grad_norm": 0.9953984300461581, + "learning_rate": 1.9196424208007026e-05, + "loss": 0.35965877771377563, + "step": 698 + }, + { + "epoch": 0.3454837513900902, + "grad_norm": 1.0276560460371096, + "learning_rate": 1.9193212083960522e-05, + "loss": 0.40995267033576965, + "step": 699 + }, + { + "epoch": 0.3459780056839244, + "grad_norm": 1.047717179086883, + "learning_rate": 1.9189993822656984e-05, + "loss": 0.373586505651474, + "step": 700 + }, + { + "epoch": 0.34647225997775855, + "grad_norm": 0.967832395747722, + "learning_rate": 1.918676942624488e-05, + "loss": 0.3651657998561859, + "step": 701 + }, + { + "epoch": 0.34696651427159275, + "grad_norm": 0.9154206667420104, + "learning_rate": 1.918353889687677e-05, + "loss": 0.3333090543746948, + "step": 702 + }, + { + "epoch": 0.3474607685654269, + "grad_norm": 1.109347895406641, + "learning_rate": 1.9180302236709312e-05, + "loss": 0.444000780582428, + "step": 703 + }, + { + "epoch": 0.3479550228592611, + "grad_norm": 0.9543494832625998, + "learning_rate": 1.917705944790325e-05, + "loss": 0.34942537546157837, + "step": 704 + }, + { + "epoch": 0.34844927715309526, + "grad_norm": 1.206317081042567, + "learning_rate": 1.9173810532623425e-05, + "loss": 0.4709789752960205, + "step": 705 + }, + { + "epoch": 0.34894353144692947, + "grad_norm": 1.0126287373930702, + "learning_rate": 1.917055549303877e-05, + "loss": 0.3615723252296448, + "step": 706 + }, + { + "epoch": 0.3494377857407636, + "grad_norm": 0.98553805717422, + "learning_rate": 1.9167294331322293e-05, + "loss": 0.366035133600235, + "step": 707 + }, + { + "epoch": 0.3499320400345978, + "grad_norm": 1.085095649211616, + "learning_rate": 1.9164027049651105e-05, + "loss": 0.3916548490524292, + "step": 708 + }, + { + "epoch": 0.350426294328432, + "grad_norm": 1.0423550617328055, + "learning_rate": 1.91607536502064e-05, + "loss": 0.3752925992012024, + "step": 709 + }, + { + "epoch": 0.3509205486222662, + "grad_norm": 1.0859051595052658, + "learning_rate": 1.9157474135173448e-05, + "loss": 0.3471261262893677, + "step": 710 + }, + { + "epoch": 0.35141480291610033, + "grad_norm": 1.0011428490015388, + "learning_rate": 1.9154188506741605e-05, + "loss": 0.36898115277290344, + "step": 711 + }, + { + "epoch": 0.3519090572099345, + "grad_norm": 1.0150877470647623, + "learning_rate": 1.9150896767104315e-05, + "loss": 0.38236287236213684, + "step": 712 + }, + { + "epoch": 0.3524033115037687, + "grad_norm": 1.0813644645593066, + "learning_rate": 1.9147598918459096e-05, + "loss": 0.39260241389274597, + "step": 713 + }, + { + "epoch": 0.35289756579760284, + "grad_norm": 0.988095993083205, + "learning_rate": 1.9144294963007542e-05, + "loss": 0.3699083626270294, + "step": 714 + }, + { + "epoch": 0.35339182009143705, + "grad_norm": 0.9649609380548236, + "learning_rate": 1.914098490295532e-05, + "loss": 0.37720543146133423, + "step": 715 + }, + { + "epoch": 0.3538860743852712, + "grad_norm": 0.8834082509396699, + "learning_rate": 1.9137668740512195e-05, + "loss": 0.298441082239151, + "step": 716 + }, + { + "epoch": 0.3543803286791054, + "grad_norm": 0.996165149875045, + "learning_rate": 1.913434647789197e-05, + "loss": 0.3867550194263458, + "step": 717 + }, + { + "epoch": 0.35487458297293956, + "grad_norm": 0.9824732772890364, + "learning_rate": 1.913101811731256e-05, + "loss": 0.37111300230026245, + "step": 718 + }, + { + "epoch": 0.35536883726677376, + "grad_norm": 0.9874274570055057, + "learning_rate": 1.9127683660995916e-05, + "loss": 0.3922812342643738, + "step": 719 + }, + { + "epoch": 0.3558630915606079, + "grad_norm": 1.0744489462576237, + "learning_rate": 1.9124343111168077e-05, + "loss": 0.3878915309906006, + "step": 720 + }, + { + "epoch": 0.3563573458544421, + "grad_norm": 0.9551023310729483, + "learning_rate": 1.9120996470059153e-05, + "loss": 0.34974879026412964, + "step": 721 + }, + { + "epoch": 0.35685160014827627, + "grad_norm": 1.0403250728390605, + "learning_rate": 1.9117643739903306e-05, + "loss": 0.38341426849365234, + "step": 722 + }, + { + "epoch": 0.3573458544421105, + "grad_norm": 0.9876921724558848, + "learning_rate": 1.9114284922938772e-05, + "loss": 0.32610252499580383, + "step": 723 + }, + { + "epoch": 0.35784010873594463, + "grad_norm": 1.0486464385186933, + "learning_rate": 1.9110920021407855e-05, + "loss": 0.37203550338745117, + "step": 724 + }, + { + "epoch": 0.35833436302977884, + "grad_norm": 1.0809240289061282, + "learning_rate": 1.9107549037556906e-05, + "loss": 0.2954786419868469, + "step": 725 + }, + { + "epoch": 0.358828617323613, + "grad_norm": 0.9795897601711951, + "learning_rate": 1.9104171973636353e-05, + "loss": 0.33074450492858887, + "step": 726 + }, + { + "epoch": 0.3593228716174472, + "grad_norm": 1.0341587070514209, + "learning_rate": 1.9100788831900676e-05, + "loss": 0.350687712430954, + "step": 727 + }, + { + "epoch": 0.35981712591128134, + "grad_norm": 1.143909518582956, + "learning_rate": 1.9097399614608406e-05, + "loss": 0.3635619878768921, + "step": 728 + }, + { + "epoch": 0.36031138020511555, + "grad_norm": 1.0607740871884148, + "learning_rate": 1.909400432402214e-05, + "loss": 0.36409544944763184, + "step": 729 + }, + { + "epoch": 0.3608056344989497, + "grad_norm": 1.069313873032721, + "learning_rate": 1.9090602962408523e-05, + "loss": 0.4109501540660858, + "step": 730 + }, + { + "epoch": 0.3612998887927839, + "grad_norm": 1.0147750628685799, + "learning_rate": 1.908719553203826e-05, + "loss": 0.337943971157074, + "step": 731 + }, + { + "epoch": 0.36179414308661806, + "grad_norm": 1.0957860180414656, + "learning_rate": 1.9083782035186097e-05, + "loss": 0.36411553621292114, + "step": 732 + }, + { + "epoch": 0.36228839738045227, + "grad_norm": 1.1570738944902594, + "learning_rate": 1.908036247413084e-05, + "loss": 0.3513786494731903, + "step": 733 + }, + { + "epoch": 0.3627826516742864, + "grad_norm": 1.156885907892102, + "learning_rate": 1.907693685115534e-05, + "loss": 0.4017047584056854, + "step": 734 + }, + { + "epoch": 0.3632769059681206, + "grad_norm": 1.0932284273900412, + "learning_rate": 1.907350516854649e-05, + "loss": 0.3780835270881653, + "step": 735 + }, + { + "epoch": 0.3637711602619548, + "grad_norm": 1.10688269569213, + "learning_rate": 1.9070067428595234e-05, + "loss": 0.35562777519226074, + "step": 736 + }, + { + "epoch": 0.364265414555789, + "grad_norm": 1.0784034928358046, + "learning_rate": 1.9066623633596556e-05, + "loss": 0.34880492091178894, + "step": 737 + }, + { + "epoch": 0.36475966884962313, + "grad_norm": 1.1213824671894879, + "learning_rate": 1.9063173785849488e-05, + "loss": 0.3798677921295166, + "step": 738 + }, + { + "epoch": 0.3652539231434573, + "grad_norm": 1.0300538330170659, + "learning_rate": 1.9059717887657098e-05, + "loss": 0.371119886636734, + "step": 739 + }, + { + "epoch": 0.3657481774372915, + "grad_norm": 1.075537593372937, + "learning_rate": 1.9056255941326497e-05, + "loss": 0.3845891058444977, + "step": 740 + }, + { + "epoch": 0.36624243173112564, + "grad_norm": 1.0460904589757556, + "learning_rate": 1.9052787949168823e-05, + "loss": 0.34627166390419006, + "step": 741 + }, + { + "epoch": 0.36673668602495985, + "grad_norm": 1.0588032623720978, + "learning_rate": 1.9049313913499266e-05, + "loss": 0.3872081935405731, + "step": 742 + }, + { + "epoch": 0.367230940318794, + "grad_norm": 1.0173727289332204, + "learning_rate": 1.9045833836637038e-05, + "loss": 0.40446269512176514, + "step": 743 + }, + { + "epoch": 0.3677251946126282, + "grad_norm": 0.9672045860873493, + "learning_rate": 1.904234772090539e-05, + "loss": 0.3421085476875305, + "step": 744 + }, + { + "epoch": 0.36821944890646235, + "grad_norm": 0.9886363928023795, + "learning_rate": 1.90388555686316e-05, + "loss": 0.3626730442047119, + "step": 745 + }, + { + "epoch": 0.36871370320029656, + "grad_norm": 0.9308335236520315, + "learning_rate": 1.9035357382146984e-05, + "loss": 0.338506281375885, + "step": 746 + }, + { + "epoch": 0.3692079574941307, + "grad_norm": 1.010277605498289, + "learning_rate": 1.903185316378688e-05, + "loss": 0.3709959089756012, + "step": 747 + }, + { + "epoch": 0.3697022117879649, + "grad_norm": 1.0369282663858728, + "learning_rate": 1.9028342915890655e-05, + "loss": 0.3804059624671936, + "step": 748 + }, + { + "epoch": 0.37019646608179907, + "grad_norm": 1.0305613800678137, + "learning_rate": 1.9024826640801694e-05, + "loss": 0.3416539132595062, + "step": 749 + }, + { + "epoch": 0.3706907203756333, + "grad_norm": 1.0119233680399335, + "learning_rate": 1.9021304340867418e-05, + "loss": 0.3642072081565857, + "step": 750 + }, + { + "epoch": 0.3711849746694674, + "grad_norm": 0.9749783281253589, + "learning_rate": 1.9017776018439267e-05, + "loss": 0.35957199335098267, + "step": 751 + }, + { + "epoch": 0.37167922896330163, + "grad_norm": 1.1539382067501942, + "learning_rate": 1.9014241675872692e-05, + "loss": 0.38497287034988403, + "step": 752 + }, + { + "epoch": 0.3721734832571358, + "grad_norm": 1.1731793747690833, + "learning_rate": 1.9010701315527173e-05, + "loss": 0.40713614225387573, + "step": 753 + }, + { + "epoch": 0.37266773755097, + "grad_norm": 1.0417857344342851, + "learning_rate": 1.9007154939766196e-05, + "loss": 0.35115551948547363, + "step": 754 + }, + { + "epoch": 0.37316199184480414, + "grad_norm": 0.958988647508799, + "learning_rate": 1.9003602550957284e-05, + "loss": 0.3478096127510071, + "step": 755 + }, + { + "epoch": 0.37365624613863835, + "grad_norm": 1.040896998789985, + "learning_rate": 1.9000044151471956e-05, + "loss": 0.36460641026496887, + "step": 756 + }, + { + "epoch": 0.3741505004324725, + "grad_norm": 1.1161707385765272, + "learning_rate": 1.8996479743685745e-05, + "loss": 0.38015758991241455, + "step": 757 + }, + { + "epoch": 0.3746447547263067, + "grad_norm": 1.1039269634713542, + "learning_rate": 1.8992909329978202e-05, + "loss": 0.35270214080810547, + "step": 758 + }, + { + "epoch": 0.37513900902014086, + "grad_norm": 1.0025131869881447, + "learning_rate": 1.8989332912732884e-05, + "loss": 0.3875473439693451, + "step": 759 + }, + { + "epoch": 0.37563326331397506, + "grad_norm": 1.0209812095079043, + "learning_rate": 1.8985750494337353e-05, + "loss": 0.3281819820404053, + "step": 760 + }, + { + "epoch": 0.3761275176078092, + "grad_norm": 1.2490133288735825, + "learning_rate": 1.8982162077183182e-05, + "loss": 0.4081311821937561, + "step": 761 + }, + { + "epoch": 0.3766217719016434, + "grad_norm": 1.2134865751354402, + "learning_rate": 1.897856766366595e-05, + "loss": 0.3546852469444275, + "step": 762 + }, + { + "epoch": 0.37711602619547757, + "grad_norm": 0.9620958606777789, + "learning_rate": 1.8974967256185234e-05, + "loss": 0.3177235424518585, + "step": 763 + }, + { + "epoch": 0.3776102804893118, + "grad_norm": 1.0401218813843935, + "learning_rate": 1.8971360857144616e-05, + "loss": 0.3739625811576843, + "step": 764 + }, + { + "epoch": 0.37810453478314593, + "grad_norm": 0.9714277368627854, + "learning_rate": 1.8967748468951673e-05, + "loss": 0.32039010524749756, + "step": 765 + }, + { + "epoch": 0.3785987890769801, + "grad_norm": 1.0178844258047104, + "learning_rate": 1.8964130094017986e-05, + "loss": 0.3237234354019165, + "step": 766 + }, + { + "epoch": 0.3790930433708143, + "grad_norm": 1.0589536664735313, + "learning_rate": 1.896050573475913e-05, + "loss": 0.33864307403564453, + "step": 767 + }, + { + "epoch": 0.37958729766464844, + "grad_norm": 1.076259010215984, + "learning_rate": 1.8956875393594675e-05, + "loss": 0.40412086248397827, + "step": 768 + }, + { + "epoch": 0.38008155195848264, + "grad_norm": 1.049114130745209, + "learning_rate": 1.8953239072948185e-05, + "loss": 0.37689530849456787, + "step": 769 + }, + { + "epoch": 0.3805758062523168, + "grad_norm": 1.1429748380406861, + "learning_rate": 1.8949596775247215e-05, + "loss": 0.3632664680480957, + "step": 770 + }, + { + "epoch": 0.381070060546151, + "grad_norm": 1.0707340379824546, + "learning_rate": 1.8945948502923314e-05, + "loss": 0.384027361869812, + "step": 771 + }, + { + "epoch": 0.38156431483998515, + "grad_norm": 1.0884709757767692, + "learning_rate": 1.8942294258412012e-05, + "loss": 0.37623292207717896, + "step": 772 + }, + { + "epoch": 0.38205856913381936, + "grad_norm": 0.9918916696644151, + "learning_rate": 1.8938634044152837e-05, + "loss": 0.3449557423591614, + "step": 773 + }, + { + "epoch": 0.3825528234276535, + "grad_norm": 1.0216495444427651, + "learning_rate": 1.8934967862589287e-05, + "loss": 0.37977170944213867, + "step": 774 + }, + { + "epoch": 0.3830470777214877, + "grad_norm": 1.035626875821766, + "learning_rate": 1.893129571616886e-05, + "loss": 0.3535463809967041, + "step": 775 + }, + { + "epoch": 0.38354133201532187, + "grad_norm": 0.9784961361645077, + "learning_rate": 1.8927617607343024e-05, + "loss": 0.3107556104660034, + "step": 776 + }, + { + "epoch": 0.3840355863091561, + "grad_norm": 0.9647734455274504, + "learning_rate": 1.8923933538567238e-05, + "loss": 0.33028605580329895, + "step": 777 + }, + { + "epoch": 0.3845298406029902, + "grad_norm": 1.0880250729774004, + "learning_rate": 1.8920243512300925e-05, + "loss": 0.35947421193122864, + "step": 778 + }, + { + "epoch": 0.38502409489682443, + "grad_norm": 1.1225656593555045, + "learning_rate": 1.89165475310075e-05, + "loss": 0.36262935400009155, + "step": 779 + }, + { + "epoch": 0.3855183491906586, + "grad_norm": 0.9595574558826961, + "learning_rate": 1.8912845597154344e-05, + "loss": 0.3441828489303589, + "step": 780 + }, + { + "epoch": 0.3860126034844928, + "grad_norm": 1.1060761912194574, + "learning_rate": 1.8909137713212813e-05, + "loss": 0.3748928904533386, + "step": 781 + }, + { + "epoch": 0.38650685777832694, + "grad_norm": 1.0401989681427097, + "learning_rate": 1.8905423881658248e-05, + "loss": 0.3571966588497162, + "step": 782 + }, + { + "epoch": 0.38700111207216115, + "grad_norm": 1.0661600684644588, + "learning_rate": 1.8901704104969937e-05, + "loss": 0.3937920331954956, + "step": 783 + }, + { + "epoch": 0.3874953663659953, + "grad_norm": 1.036207969764135, + "learning_rate": 1.8897978385631157e-05, + "loss": 0.3641708493232727, + "step": 784 + }, + { + "epoch": 0.3879896206598295, + "grad_norm": 1.0259735566777997, + "learning_rate": 1.8894246726129143e-05, + "loss": 0.33510833978652954, + "step": 785 + }, + { + "epoch": 0.38848387495366365, + "grad_norm": 1.0496886995032506, + "learning_rate": 1.88905091289551e-05, + "loss": 0.3553236722946167, + "step": 786 + }, + { + "epoch": 0.38897812924749786, + "grad_norm": 1.1065055000350301, + "learning_rate": 1.8886765596604188e-05, + "loss": 0.3802195191383362, + "step": 787 + }, + { + "epoch": 0.389472383541332, + "grad_norm": 1.0233155379560877, + "learning_rate": 1.8883016131575546e-05, + "loss": 0.3672805726528168, + "step": 788 + }, + { + "epoch": 0.3899666378351662, + "grad_norm": 1.1021600101810725, + "learning_rate": 1.887926073637225e-05, + "loss": 0.35715609788894653, + "step": 789 + }, + { + "epoch": 0.39046089212900037, + "grad_norm": 1.0669470229074853, + "learning_rate": 1.8875499413501362e-05, + "loss": 0.3800659775733948, + "step": 790 + }, + { + "epoch": 0.3909551464228345, + "grad_norm": 1.0110531011706714, + "learning_rate": 1.8871732165473878e-05, + "loss": 0.36886462569236755, + "step": 791 + }, + { + "epoch": 0.3914494007166687, + "grad_norm": 1.1716485087298352, + "learning_rate": 1.886795899480476e-05, + "loss": 0.37373536825180054, + "step": 792 + }, + { + "epoch": 0.3919436550105029, + "grad_norm": 1.1804493539453536, + "learning_rate": 1.8864179904012932e-05, + "loss": 0.4016551375389099, + "step": 793 + }, + { + "epoch": 0.3924379093043371, + "grad_norm": 1.3227573763511704, + "learning_rate": 1.886039489562125e-05, + "loss": 0.35107535123825073, + "step": 794 + }, + { + "epoch": 0.39293216359817124, + "grad_norm": 1.0690764214154878, + "learning_rate": 1.8856603972156532e-05, + "loss": 0.36280331015586853, + "step": 795 + }, + { + "epoch": 0.39342641789200544, + "grad_norm": 1.0372650355149657, + "learning_rate": 1.885280713614955e-05, + "loss": 0.3417884111404419, + "step": 796 + }, + { + "epoch": 0.3939206721858396, + "grad_norm": 1.0135638633522712, + "learning_rate": 1.8849004390135017e-05, + "loss": 0.3257544934749603, + "step": 797 + }, + { + "epoch": 0.3944149264796738, + "grad_norm": 1.138312578356034, + "learning_rate": 1.8845195736651588e-05, + "loss": 0.3694860339164734, + "step": 798 + }, + { + "epoch": 0.39490918077350795, + "grad_norm": 1.0432466517484986, + "learning_rate": 1.8841381178241865e-05, + "loss": 0.37279266119003296, + "step": 799 + }, + { + "epoch": 0.39540343506734216, + "grad_norm": 1.023281980764518, + "learning_rate": 1.88375607174524e-05, + "loss": 0.38758352398872375, + "step": 800 + }, + { + "epoch": 0.3958976893611763, + "grad_norm": 1.0321652923702807, + "learning_rate": 1.883373435683367e-05, + "loss": 0.34098950028419495, + "step": 801 + }, + { + "epoch": 0.3963919436550105, + "grad_norm": 1.0256865325574602, + "learning_rate": 1.8829902098940105e-05, + "loss": 0.3278653621673584, + "step": 802 + }, + { + "epoch": 0.39688619794884467, + "grad_norm": 1.1042531688452888, + "learning_rate": 1.8826063946330065e-05, + "loss": 0.3673133850097656, + "step": 803 + }, + { + "epoch": 0.39738045224267887, + "grad_norm": 0.9510108180701087, + "learning_rate": 1.882221990156584e-05, + "loss": 0.37917453050613403, + "step": 804 + }, + { + "epoch": 0.397874706536513, + "grad_norm": 0.9926574292369763, + "learning_rate": 1.8818369967213662e-05, + "loss": 0.33986327052116394, + "step": 805 + }, + { + "epoch": 0.39836896083034723, + "grad_norm": 1.0256369099360807, + "learning_rate": 1.8814514145843694e-05, + "loss": 0.34402647614479065, + "step": 806 + }, + { + "epoch": 0.3988632151241814, + "grad_norm": 1.0984836868071073, + "learning_rate": 1.8810652440030026e-05, + "loss": 0.32781803607940674, + "step": 807 + }, + { + "epoch": 0.3993574694180156, + "grad_norm": 1.063630501097469, + "learning_rate": 1.8806784852350678e-05, + "loss": 0.35807961225509644, + "step": 808 + }, + { + "epoch": 0.39985172371184974, + "grad_norm": 1.0130076092125457, + "learning_rate": 1.8802911385387596e-05, + "loss": 0.33577096462249756, + "step": 809 + }, + { + "epoch": 0.40034597800568394, + "grad_norm": 1.205635135602797, + "learning_rate": 1.8799032041726654e-05, + "loss": 0.37786391377449036, + "step": 810 + }, + { + "epoch": 0.4008402322995181, + "grad_norm": 1.0055899694647235, + "learning_rate": 1.879514682395764e-05, + "loss": 0.3237725496292114, + "step": 811 + }, + { + "epoch": 0.4013344865933523, + "grad_norm": 1.0557641796624602, + "learning_rate": 1.8791255734674275e-05, + "loss": 0.29552844166755676, + "step": 812 + }, + { + "epoch": 0.40182874088718645, + "grad_norm": 1.0675222800328668, + "learning_rate": 1.8787358776474192e-05, + "loss": 0.40317612886428833, + "step": 813 + }, + { + "epoch": 0.40232299518102066, + "grad_norm": 1.0548023053217102, + "learning_rate": 1.8783455951958948e-05, + "loss": 0.33383694291114807, + "step": 814 + }, + { + "epoch": 0.4028172494748548, + "grad_norm": 1.0255061005640398, + "learning_rate": 1.8779547263734012e-05, + "loss": 0.35020262002944946, + "step": 815 + }, + { + "epoch": 0.403311503768689, + "grad_norm": 1.098709822155027, + "learning_rate": 1.8775632714408765e-05, + "loss": 0.3742774724960327, + "step": 816 + }, + { + "epoch": 0.40380575806252317, + "grad_norm": 0.9986084839363315, + "learning_rate": 1.8771712306596506e-05, + "loss": 0.35037580132484436, + "step": 817 + }, + { + "epoch": 0.4043000123563573, + "grad_norm": 1.078218018297503, + "learning_rate": 1.8767786042914445e-05, + "loss": 0.3416820168495178, + "step": 818 + }, + { + "epoch": 0.4047942666501915, + "grad_norm": 1.0398523365943921, + "learning_rate": 1.8763853925983695e-05, + "loss": 0.33287927508354187, + "step": 819 + }, + { + "epoch": 0.4052885209440257, + "grad_norm": 1.031774367057856, + "learning_rate": 1.875991595842929e-05, + "loss": 0.3493141531944275, + "step": 820 + }, + { + "epoch": 0.4057827752378599, + "grad_norm": 1.1647269737420223, + "learning_rate": 1.875597214288015e-05, + "loss": 0.4184780418872833, + "step": 821 + }, + { + "epoch": 0.40627702953169403, + "grad_norm": 1.0098974718957208, + "learning_rate": 1.8752022481969116e-05, + "loss": 0.33189794421195984, + "step": 822 + }, + { + "epoch": 0.40677128382552824, + "grad_norm": 1.1012026040533913, + "learning_rate": 1.8748066978332925e-05, + "loss": 0.35339856147766113, + "step": 823 + }, + { + "epoch": 0.4072655381193624, + "grad_norm": 0.990995886573267, + "learning_rate": 1.874410563461221e-05, + "loss": 0.3766328692436218, + "step": 824 + }, + { + "epoch": 0.4077597924131966, + "grad_norm": 1.023451056136873, + "learning_rate": 1.874013845345152e-05, + "loss": 0.32575076818466187, + "step": 825 + }, + { + "epoch": 0.40825404670703075, + "grad_norm": 0.9933822197860499, + "learning_rate": 1.8736165437499273e-05, + "loss": 0.3417864441871643, + "step": 826 + }, + { + "epoch": 0.40874830100086496, + "grad_norm": 1.053854919420327, + "learning_rate": 1.8732186589407807e-05, + "loss": 0.3636544942855835, + "step": 827 + }, + { + "epoch": 0.4092425552946991, + "grad_norm": 1.0398605740994966, + "learning_rate": 1.872820191183334e-05, + "loss": 0.38730406761169434, + "step": 828 + }, + { + "epoch": 0.4097368095885333, + "grad_norm": 1.031894160648423, + "learning_rate": 1.872421140743599e-05, + "loss": 0.3593043088912964, + "step": 829 + }, + { + "epoch": 0.41023106388236746, + "grad_norm": 1.046860972263581, + "learning_rate": 1.872021507887976e-05, + "loss": 0.39092978835105896, + "step": 830 + }, + { + "epoch": 0.41072531817620167, + "grad_norm": 1.1607362555786684, + "learning_rate": 1.8716212928832537e-05, + "loss": 0.3745616674423218, + "step": 831 + }, + { + "epoch": 0.4112195724700358, + "grad_norm": 1.1451994826740608, + "learning_rate": 1.87122049599661e-05, + "loss": 0.39571845531463623, + "step": 832 + }, + { + "epoch": 0.41171382676387, + "grad_norm": 1.0987542615004384, + "learning_rate": 1.8708191174956116e-05, + "loss": 0.35459476709365845, + "step": 833 + }, + { + "epoch": 0.4122080810577042, + "grad_norm": 1.1159636372579822, + "learning_rate": 1.870417157648213e-05, + "loss": 0.38937896490097046, + "step": 834 + }, + { + "epoch": 0.4127023353515384, + "grad_norm": 1.002441779942121, + "learning_rate": 1.8700146167227563e-05, + "loss": 0.33595120906829834, + "step": 835 + }, + { + "epoch": 0.41319658964537254, + "grad_norm": 0.9899088387295479, + "learning_rate": 1.869611494987973e-05, + "loss": 0.332889199256897, + "step": 836 + }, + { + "epoch": 0.41369084393920674, + "grad_norm": 1.0005984941908395, + "learning_rate": 1.8692077927129803e-05, + "loss": 0.333438515663147, + "step": 837 + }, + { + "epoch": 0.4141850982330409, + "grad_norm": 0.9672990037342486, + "learning_rate": 1.868803510167285e-05, + "loss": 0.30645743012428284, + "step": 838 + }, + { + "epoch": 0.4146793525268751, + "grad_norm": 1.0166404987540014, + "learning_rate": 1.86839864762078e-05, + "loss": 0.3333967924118042, + "step": 839 + }, + { + "epoch": 0.41517360682070925, + "grad_norm": 1.1324675944020866, + "learning_rate": 1.867993205343746e-05, + "loss": 0.36230576038360596, + "step": 840 + }, + { + "epoch": 0.41566786111454346, + "grad_norm": 1.4565152055506116, + "learning_rate": 1.8675871836068498e-05, + "loss": 0.34191709756851196, + "step": 841 + }, + { + "epoch": 0.4161621154083776, + "grad_norm": 1.1876819294674656, + "learning_rate": 1.8671805826811462e-05, + "loss": 0.3115188479423523, + "step": 842 + }, + { + "epoch": 0.4166563697022118, + "grad_norm": 1.023080563524472, + "learning_rate": 1.866773402838076e-05, + "loss": 0.3725768029689789, + "step": 843 + }, + { + "epoch": 0.41715062399604597, + "grad_norm": 1.1051799194693688, + "learning_rate": 1.8663656443494673e-05, + "loss": 0.376983642578125, + "step": 844 + }, + { + "epoch": 0.4176448782898801, + "grad_norm": 1.0101343157113072, + "learning_rate": 1.8659573074875327e-05, + "loss": 0.31490784883499146, + "step": 845 + }, + { + "epoch": 0.4181391325837143, + "grad_norm": 1.0250002510666845, + "learning_rate": 1.8655483925248727e-05, + "loss": 0.3533504605293274, + "step": 846 + }, + { + "epoch": 0.4186333868775485, + "grad_norm": 1.090746715781531, + "learning_rate": 1.8651388997344734e-05, + "loss": 0.3282274305820465, + "step": 847 + }, + { + "epoch": 0.4191276411713827, + "grad_norm": 1.1145704933282803, + "learning_rate": 1.8647288293897055e-05, + "loss": 0.32892414927482605, + "step": 848 + }, + { + "epoch": 0.41962189546521683, + "grad_norm": 1.1451436882679205, + "learning_rate": 1.864318181764327e-05, + "loss": 0.40414246916770935, + "step": 849 + }, + { + "epoch": 0.42011614975905104, + "grad_norm": 0.9874933781402742, + "learning_rate": 1.8639069571324798e-05, + "loss": 0.30335378646850586, + "step": 850 + }, + { + "epoch": 0.4206104040528852, + "grad_norm": 1.0390790492756226, + "learning_rate": 1.863495155768692e-05, + "loss": 0.311710000038147, + "step": 851 + }, + { + "epoch": 0.4211046583467194, + "grad_norm": 1.1685121542837038, + "learning_rate": 1.8630827779478755e-05, + "loss": 0.37345218658447266, + "step": 852 + }, + { + "epoch": 0.42159891264055355, + "grad_norm": 1.118375459884757, + "learning_rate": 1.8626698239453287e-05, + "loss": 0.37286317348480225, + "step": 853 + }, + { + "epoch": 0.42209316693438775, + "grad_norm": 1.061435107804804, + "learning_rate": 1.8622562940367335e-05, + "loss": 0.3706691861152649, + "step": 854 + }, + { + "epoch": 0.4225874212282219, + "grad_norm": 1.045639661440086, + "learning_rate": 1.8618421884981567e-05, + "loss": 0.30183354020118713, + "step": 855 + }, + { + "epoch": 0.4230816755220561, + "grad_norm": 0.9282918926966607, + "learning_rate": 1.8614275076060486e-05, + "loss": 0.32329827547073364, + "step": 856 + }, + { + "epoch": 0.42357592981589026, + "grad_norm": 0.9823332197669685, + "learning_rate": 1.861012251637245e-05, + "loss": 0.39380010962486267, + "step": 857 + }, + { + "epoch": 0.42407018410972447, + "grad_norm": 1.2258684110272524, + "learning_rate": 1.8605964208689646e-05, + "loss": 0.41745316982269287, + "step": 858 + }, + { + "epoch": 0.4245644384035586, + "grad_norm": 1.0539643629085786, + "learning_rate": 1.86018001557881e-05, + "loss": 0.36751389503479004, + "step": 859 + }, + { + "epoch": 0.4250586926973928, + "grad_norm": 1.052378043397748, + "learning_rate": 1.8597630360447673e-05, + "loss": 0.36876100301742554, + "step": 860 + }, + { + "epoch": 0.425552946991227, + "grad_norm": 1.0649813734142937, + "learning_rate": 1.8593454825452067e-05, + "loss": 0.3473365306854248, + "step": 861 + }, + { + "epoch": 0.4260472012850612, + "grad_norm": 1.0186749062796028, + "learning_rate": 1.8589273553588802e-05, + "loss": 0.3429828882217407, + "step": 862 + }, + { + "epoch": 0.42654145557889533, + "grad_norm": 0.9471164855143414, + "learning_rate": 1.8585086547649238e-05, + "loss": 0.3424219787120819, + "step": 863 + }, + { + "epoch": 0.42703570987272954, + "grad_norm": 1.002345729786534, + "learning_rate": 1.8580893810428562e-05, + "loss": 0.32187891006469727, + "step": 864 + }, + { + "epoch": 0.4275299641665637, + "grad_norm": 0.997893238522563, + "learning_rate": 1.8576695344725785e-05, + "loss": 0.3116072416305542, + "step": 865 + }, + { + "epoch": 0.4280242184603979, + "grad_norm": 0.9198063604105835, + "learning_rate": 1.8572491153343742e-05, + "loss": 0.32645124197006226, + "step": 866 + }, + { + "epoch": 0.42851847275423205, + "grad_norm": 1.0827892730720303, + "learning_rate": 1.8568281239089088e-05, + "loss": 0.36861616373062134, + "step": 867 + }, + { + "epoch": 0.42901272704806626, + "grad_norm": 1.05561333743087, + "learning_rate": 1.8564065604772307e-05, + "loss": 0.38477885723114014, + "step": 868 + }, + { + "epoch": 0.4295069813419004, + "grad_norm": 1.1711610330815532, + "learning_rate": 1.8559844253207694e-05, + "loss": 0.352588951587677, + "step": 869 + }, + { + "epoch": 0.43000123563573456, + "grad_norm": 1.1459489566657088, + "learning_rate": 1.8555617187213362e-05, + "loss": 0.43443864583969116, + "step": 870 + }, + { + "epoch": 0.43049548992956876, + "grad_norm": 1.1608032541581428, + "learning_rate": 1.8551384409611238e-05, + "loss": 0.37355685234069824, + "step": 871 + }, + { + "epoch": 0.4309897442234029, + "grad_norm": 1.120838755410591, + "learning_rate": 1.854714592322707e-05, + "loss": 0.3529026508331299, + "step": 872 + }, + { + "epoch": 0.4314839985172371, + "grad_norm": 1.031744932760461, + "learning_rate": 1.854290173089041e-05, + "loss": 0.3278823494911194, + "step": 873 + }, + { + "epoch": 0.4319782528110713, + "grad_norm": 1.045846838310407, + "learning_rate": 1.8538651835434615e-05, + "loss": 0.3677588999271393, + "step": 874 + }, + { + "epoch": 0.4324725071049055, + "grad_norm": 0.9726822011565114, + "learning_rate": 1.8534396239696852e-05, + "loss": 0.34132176637649536, + "step": 875 + }, + { + "epoch": 0.43296676139873963, + "grad_norm": 0.967842291132869, + "learning_rate": 1.8530134946518106e-05, + "loss": 0.3329963684082031, + "step": 876 + }, + { + "epoch": 0.43346101569257384, + "grad_norm": 1.1447169522915757, + "learning_rate": 1.852586795874315e-05, + "loss": 0.38435080647468567, + "step": 877 + }, + { + "epoch": 0.433955269986408, + "grad_norm": 1.076068410050275, + "learning_rate": 1.8521595279220564e-05, + "loss": 0.3737541735172272, + "step": 878 + }, + { + "epoch": 0.4344495242802422, + "grad_norm": 1.0947429210573731, + "learning_rate": 1.851731691080273e-05, + "loss": 0.3676382303237915, + "step": 879 + }, + { + "epoch": 0.43494377857407635, + "grad_norm": 0.9624268111771948, + "learning_rate": 1.8513032856345825e-05, + "loss": 0.317960262298584, + "step": 880 + }, + { + "epoch": 0.43543803286791055, + "grad_norm": 1.040958800557315, + "learning_rate": 1.8508743118709816e-05, + "loss": 0.38857966661453247, + "step": 881 + }, + { + "epoch": 0.4359322871617447, + "grad_norm": 1.0694529449199925, + "learning_rate": 1.8504447700758482e-05, + "loss": 0.33234506845474243, + "step": 882 + }, + { + "epoch": 0.4364265414555789, + "grad_norm": 1.0262098516685678, + "learning_rate": 1.8500146605359375e-05, + "loss": 0.3380611538887024, + "step": 883 + }, + { + "epoch": 0.43692079574941306, + "grad_norm": 1.032922511494617, + "learning_rate": 1.8495839835383845e-05, + "loss": 0.36386823654174805, + "step": 884 + }, + { + "epoch": 0.43741505004324727, + "grad_norm": 1.0814661245803954, + "learning_rate": 1.849152739370703e-05, + "loss": 0.34711897373199463, + "step": 885 + }, + { + "epoch": 0.4379093043370814, + "grad_norm": 1.1112439466083954, + "learning_rate": 1.848720928320786e-05, + "loss": 0.3861457109451294, + "step": 886 + }, + { + "epoch": 0.4384035586309156, + "grad_norm": 1.0062524071684966, + "learning_rate": 1.848288550676904e-05, + "loss": 0.3387115001678467, + "step": 887 + }, + { + "epoch": 0.4388978129247498, + "grad_norm": 1.119801920916648, + "learning_rate": 1.847855606727706e-05, + "loss": 0.3419748842716217, + "step": 888 + }, + { + "epoch": 0.439392067218584, + "grad_norm": 1.1162084355940824, + "learning_rate": 1.847422096762219e-05, + "loss": 0.38184499740600586, + "step": 889 + }, + { + "epoch": 0.43988632151241813, + "grad_norm": 1.1974191241625343, + "learning_rate": 1.846988021069849e-05, + "loss": 0.3845345973968506, + "step": 890 + }, + { + "epoch": 0.44038057580625234, + "grad_norm": 1.035257767207683, + "learning_rate": 1.8465533799403778e-05, + "loss": 0.31854647397994995, + "step": 891 + }, + { + "epoch": 0.4408748301000865, + "grad_norm": 1.2150547461116588, + "learning_rate": 1.8461181736639658e-05, + "loss": 0.3940027356147766, + "step": 892 + }, + { + "epoch": 0.4413690843939207, + "grad_norm": 1.0827124100419134, + "learning_rate": 1.8456824025311508e-05, + "loss": 0.3580612540245056, + "step": 893 + }, + { + "epoch": 0.44186333868775485, + "grad_norm": 1.0457692243819372, + "learning_rate": 1.8452460668328474e-05, + "loss": 0.3662642240524292, + "step": 894 + }, + { + "epoch": 0.44235759298158905, + "grad_norm": 1.3135451040729966, + "learning_rate": 1.8448091668603464e-05, + "loss": 0.29031360149383545, + "step": 895 + }, + { + "epoch": 0.4428518472754232, + "grad_norm": 1.2267380523250877, + "learning_rate": 1.844371702905317e-05, + "loss": 0.36141306161880493, + "step": 896 + }, + { + "epoch": 0.44334610156925736, + "grad_norm": 0.9926258795727512, + "learning_rate": 1.8439336752598027e-05, + "loss": 0.35286253690719604, + "step": 897 + }, + { + "epoch": 0.44384035586309156, + "grad_norm": 1.0509214985554662, + "learning_rate": 1.8434950842162256e-05, + "loss": 0.38967087864875793, + "step": 898 + }, + { + "epoch": 0.4443346101569257, + "grad_norm": 1.1041873655686079, + "learning_rate": 1.8430559300673824e-05, + "loss": 0.4260423183441162, + "step": 899 + }, + { + "epoch": 0.4448288644507599, + "grad_norm": 1.0004221402171782, + "learning_rate": 1.8426162131064456e-05, + "loss": 0.35336780548095703, + "step": 900 + }, + { + "epoch": 0.44532311874459407, + "grad_norm": 1.0124996907215051, + "learning_rate": 1.842175933626965e-05, + "loss": 0.32953035831451416, + "step": 901 + }, + { + "epoch": 0.4458173730384283, + "grad_norm": 1.1481125848953921, + "learning_rate": 1.841735091922864e-05, + "loss": 0.3495085537433624, + "step": 902 + }, + { + "epoch": 0.44631162733226243, + "grad_norm": 1.0556558347257945, + "learning_rate": 1.8412936882884426e-05, + "loss": 0.3774382174015045, + "step": 903 + }, + { + "epoch": 0.44680588162609663, + "grad_norm": 1.1488659780400408, + "learning_rate": 1.8408517230183756e-05, + "loss": 0.397183358669281, + "step": 904 + }, + { + "epoch": 0.4473001359199308, + "grad_norm": 1.1226988100601583, + "learning_rate": 1.840409196407713e-05, + "loss": 0.4004632234573364, + "step": 905 + }, + { + "epoch": 0.447794390213765, + "grad_norm": 0.9888048683742604, + "learning_rate": 1.8399661087518784e-05, + "loss": 0.3464478850364685, + "step": 906 + }, + { + "epoch": 0.44828864450759914, + "grad_norm": 1.0618254470638813, + "learning_rate": 1.839522460346671e-05, + "loss": 0.38161879777908325, + "step": 907 + }, + { + "epoch": 0.44878289880143335, + "grad_norm": 1.0021571541379897, + "learning_rate": 1.839078251488265e-05, + "loss": 0.3307412266731262, + "step": 908 + }, + { + "epoch": 0.4492771530952675, + "grad_norm": 1.0558486391083746, + "learning_rate": 1.838633482473207e-05, + "loss": 0.3238945007324219, + "step": 909 + }, + { + "epoch": 0.4497714073891017, + "grad_norm": 1.1763396472681338, + "learning_rate": 1.8381881535984186e-05, + "loss": 0.37863802909851074, + "step": 910 + }, + { + "epoch": 0.45026566168293586, + "grad_norm": 1.187536001798055, + "learning_rate": 1.8377422651611955e-05, + "loss": 0.35920199751853943, + "step": 911 + }, + { + "epoch": 0.45075991597677006, + "grad_norm": 1.1108046485108733, + "learning_rate": 1.8372958174592054e-05, + "loss": 0.3913283050060272, + "step": 912 + }, + { + "epoch": 0.4512541702706042, + "grad_norm": 1.029447767687351, + "learning_rate": 1.8368488107904916e-05, + "loss": 0.32950836420059204, + "step": 913 + }, + { + "epoch": 0.4517484245644384, + "grad_norm": 0.9275296283957708, + "learning_rate": 1.8364012454534687e-05, + "loss": 0.30557066202163696, + "step": 914 + }, + { + "epoch": 0.4522426788582726, + "grad_norm": 1.0685283966213752, + "learning_rate": 1.835953121746925e-05, + "loss": 0.3280435800552368, + "step": 915 + }, + { + "epoch": 0.4527369331521068, + "grad_norm": 1.0053118292301932, + "learning_rate": 1.835504439970021e-05, + "loss": 0.323611319065094, + "step": 916 + }, + { + "epoch": 0.45323118744594093, + "grad_norm": 1.086332749113099, + "learning_rate": 1.835055200422292e-05, + "loss": 0.3794775605201721, + "step": 917 + }, + { + "epoch": 0.45372544173977514, + "grad_norm": 1.1746257984153148, + "learning_rate": 1.8346054034036418e-05, + "loss": 0.3437816798686981, + "step": 918 + }, + { + "epoch": 0.4542196960336093, + "grad_norm": 1.175593282348777, + "learning_rate": 1.8341550492143497e-05, + "loss": 0.40312957763671875, + "step": 919 + }, + { + "epoch": 0.4547139503274435, + "grad_norm": 1.0344840643948632, + "learning_rate": 1.833704138155065e-05, + "loss": 0.33988016843795776, + "step": 920 + }, + { + "epoch": 0.45520820462127765, + "grad_norm": 1.099362227926189, + "learning_rate": 1.83325267052681e-05, + "loss": 0.30893969535827637, + "step": 921 + }, + { + "epoch": 0.45570245891511185, + "grad_norm": 1.1279932203915406, + "learning_rate": 1.832800646630978e-05, + "loss": 0.3351095914840698, + "step": 922 + }, + { + "epoch": 0.456196713208946, + "grad_norm": 1.0211776718159757, + "learning_rate": 1.8323480667693335e-05, + "loss": 0.3235122561454773, + "step": 923 + }, + { + "epoch": 0.45669096750278015, + "grad_norm": 1.0274671423740642, + "learning_rate": 1.8318949312440126e-05, + "loss": 0.3482256531715393, + "step": 924 + }, + { + "epoch": 0.45718522179661436, + "grad_norm": 1.0223238909560575, + "learning_rate": 1.831441240357522e-05, + "loss": 0.3577580451965332, + "step": 925 + }, + { + "epoch": 0.4576794760904485, + "grad_norm": 1.100617534966992, + "learning_rate": 1.8309869944127386e-05, + "loss": 0.34081172943115234, + "step": 926 + }, + { + "epoch": 0.4581737303842827, + "grad_norm": 1.1911908757683491, + "learning_rate": 1.8305321937129118e-05, + "loss": 0.4041389524936676, + "step": 927 + }, + { + "epoch": 0.45866798467811687, + "grad_norm": 0.9300326755373893, + "learning_rate": 1.830076838561659e-05, + "loss": 0.3014240562915802, + "step": 928 + }, + { + "epoch": 0.4591622389719511, + "grad_norm": 1.0061666296037273, + "learning_rate": 1.829620929262969e-05, + "loss": 0.3105698823928833, + "step": 929 + }, + { + "epoch": 0.4596564932657852, + "grad_norm": 1.035696211609358, + "learning_rate": 1.8291644661212008e-05, + "loss": 0.36114832758903503, + "step": 930 + }, + { + "epoch": 0.46015074755961943, + "grad_norm": 1.0621844186259055, + "learning_rate": 1.828707449441082e-05, + "loss": 0.33738240599632263, + "step": 931 + }, + { + "epoch": 0.4606450018534536, + "grad_norm": 1.0507412286541111, + "learning_rate": 1.8282498795277108e-05, + "loss": 0.3455100655555725, + "step": 932 + }, + { + "epoch": 0.4611392561472878, + "grad_norm": 1.0635377650103532, + "learning_rate": 1.8277917566865544e-05, + "loss": 0.3622395992279053, + "step": 933 + }, + { + "epoch": 0.46163351044112194, + "grad_norm": 1.1698746861585616, + "learning_rate": 1.8273330812234488e-05, + "loss": 0.36942192912101746, + "step": 934 + }, + { + "epoch": 0.46212776473495615, + "grad_norm": 1.1083328377879573, + "learning_rate": 1.8268738534445996e-05, + "loss": 0.33603039383888245, + "step": 935 + }, + { + "epoch": 0.4626220190287903, + "grad_norm": 1.0473328437100615, + "learning_rate": 1.82641407365658e-05, + "loss": 0.34806567430496216, + "step": 936 + }, + { + "epoch": 0.4631162733226245, + "grad_norm": 1.0559884618945852, + "learning_rate": 1.8259537421663333e-05, + "loss": 0.35512328147888184, + "step": 937 + }, + { + "epoch": 0.46361052761645866, + "grad_norm": 1.0108795008514326, + "learning_rate": 1.8254928592811695e-05, + "loss": 0.33349719643592834, + "step": 938 + }, + { + "epoch": 0.46410478191029286, + "grad_norm": 1.2122442261111321, + "learning_rate": 1.8250314253087677e-05, + "loss": 0.3510274887084961, + "step": 939 + }, + { + "epoch": 0.464599036204127, + "grad_norm": 1.2184941603930532, + "learning_rate": 1.824569440557175e-05, + "loss": 0.35831883549690247, + "step": 940 + }, + { + "epoch": 0.4650932904979612, + "grad_norm": 1.1635496425287044, + "learning_rate": 1.824106905334805e-05, + "loss": 0.353208065032959, + "step": 941 + }, + { + "epoch": 0.46558754479179537, + "grad_norm": 1.1400926219916139, + "learning_rate": 1.8236438199504402e-05, + "loss": 0.3335849642753601, + "step": 942 + }, + { + "epoch": 0.4660817990856296, + "grad_norm": 1.0623049779098108, + "learning_rate": 1.8231801847132294e-05, + "loss": 0.346247136592865, + "step": 943 + }, + { + "epoch": 0.46657605337946373, + "grad_norm": 1.0719060242361118, + "learning_rate": 1.8227159999326895e-05, + "loss": 0.35125380754470825, + "step": 944 + }, + { + "epoch": 0.46707030767329794, + "grad_norm": 1.026675887024196, + "learning_rate": 1.822251265918703e-05, + "loss": 0.34262675046920776, + "step": 945 + }, + { + "epoch": 0.4675645619671321, + "grad_norm": 1.0951735908349534, + "learning_rate": 1.82178598298152e-05, + "loss": 0.3437168598175049, + "step": 946 + }, + { + "epoch": 0.4680588162609663, + "grad_norm": 1.2204880290084008, + "learning_rate": 1.8213201514317565e-05, + "loss": 0.35729774832725525, + "step": 947 + }, + { + "epoch": 0.46855307055480044, + "grad_norm": 1.1062871199303559, + "learning_rate": 1.8208537715803954e-05, + "loss": 0.36507898569107056, + "step": 948 + }, + { + "epoch": 0.46904732484863465, + "grad_norm": 1.0875432400928187, + "learning_rate": 1.8203868437387847e-05, + "loss": 0.363017737865448, + "step": 949 + }, + { + "epoch": 0.4695415791424688, + "grad_norm": 1.0718622311605446, + "learning_rate": 1.8199193682186388e-05, + "loss": 0.3645821511745453, + "step": 950 + }, + { + "epoch": 0.47003583343630295, + "grad_norm": 1.2195854283374437, + "learning_rate": 1.8194513453320387e-05, + "loss": 0.3054324686527252, + "step": 951 + }, + { + "epoch": 0.47053008773013716, + "grad_norm": 1.0538248118306075, + "learning_rate": 1.8189827753914282e-05, + "loss": 0.35003694891929626, + "step": 952 + }, + { + "epoch": 0.4710243420239713, + "grad_norm": 1.1789267282791076, + "learning_rate": 1.8185136587096193e-05, + "loss": 0.37834814190864563, + "step": 953 + }, + { + "epoch": 0.4715185963178055, + "grad_norm": 1.0741971770420784, + "learning_rate": 1.8180439955997867e-05, + "loss": 0.3369285464286804, + "step": 954 + }, + { + "epoch": 0.47201285061163967, + "grad_norm": 1.010532535770725, + "learning_rate": 1.8175737863754706e-05, + "loss": 0.3612895905971527, + "step": 955 + }, + { + "epoch": 0.4725071049054739, + "grad_norm": 1.057430538694607, + "learning_rate": 1.817103031350577e-05, + "loss": 0.34393271803855896, + "step": 956 + }, + { + "epoch": 0.473001359199308, + "grad_norm": 1.0983705860238564, + "learning_rate": 1.8166317308393745e-05, + "loss": 0.3824620544910431, + "step": 957 + }, + { + "epoch": 0.47349561349314223, + "grad_norm": 1.0093831974265368, + "learning_rate": 1.816159885156497e-05, + "loss": 0.3092145621776581, + "step": 958 + }, + { + "epoch": 0.4739898677869764, + "grad_norm": 0.9971938324913802, + "learning_rate": 1.8156874946169414e-05, + "loss": 0.3328183889389038, + "step": 959 + }, + { + "epoch": 0.4744841220808106, + "grad_norm": 1.1071894513842127, + "learning_rate": 1.815214559536069e-05, + "loss": 0.3715244233608246, + "step": 960 + }, + { + "epoch": 0.47497837637464474, + "grad_norm": 0.9615506144211561, + "learning_rate": 1.814741080229605e-05, + "loss": 0.31065690517425537, + "step": 961 + }, + { + "epoch": 0.47547263066847895, + "grad_norm": 1.0443475280559777, + "learning_rate": 1.814267057013637e-05, + "loss": 0.3632475733757019, + "step": 962 + }, + { + "epoch": 0.4759668849623131, + "grad_norm": 1.0447314581931118, + "learning_rate": 1.813792490204616e-05, + "loss": 0.3367992043495178, + "step": 963 + }, + { + "epoch": 0.4764611392561473, + "grad_norm": 3.0902704784337263, + "learning_rate": 1.813317380119356e-05, + "loss": 0.37678295373916626, + "step": 964 + }, + { + "epoch": 0.47695539354998145, + "grad_norm": 1.092515860835368, + "learning_rate": 1.8128417270750342e-05, + "loss": 0.31454166769981384, + "step": 965 + }, + { + "epoch": 0.47744964784381566, + "grad_norm": 1.1351912635055343, + "learning_rate": 1.81236553138919e-05, + "loss": 0.38495004177093506, + "step": 966 + }, + { + "epoch": 0.4779439021376498, + "grad_norm": 1.1935841314497264, + "learning_rate": 1.8118887933797237e-05, + "loss": 0.3867315948009491, + "step": 967 + }, + { + "epoch": 0.478438156431484, + "grad_norm": 1.0520609240642282, + "learning_rate": 1.8114115133648996e-05, + "loss": 0.3453156650066376, + "step": 968 + }, + { + "epoch": 0.47893241072531817, + "grad_norm": 1.0244115852831113, + "learning_rate": 1.8109336916633426e-05, + "loss": 0.34461456537246704, + "step": 969 + }, + { + "epoch": 0.4794266650191524, + "grad_norm": 1.0814329785787762, + "learning_rate": 1.8104553285940404e-05, + "loss": 0.36489856243133545, + "step": 970 + }, + { + "epoch": 0.4799209193129865, + "grad_norm": 1.0551232871498393, + "learning_rate": 1.80997642447634e-05, + "loss": 0.3596840500831604, + "step": 971 + }, + { + "epoch": 0.48041517360682073, + "grad_norm": 1.1473167291229827, + "learning_rate": 1.8094969796299527e-05, + "loss": 0.3856956362724304, + "step": 972 + }, + { + "epoch": 0.4809094279006549, + "grad_norm": 1.036679746340059, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.3235170245170593, + "step": 973 + }, + { + "epoch": 0.4814036821944891, + "grad_norm": 0.9980037023378185, + "learning_rate": 1.8085364690317564e-05, + "loss": 0.28033584356307983, + "step": 974 + }, + { + "epoch": 0.48189793648832324, + "grad_norm": 1.0350899218465197, + "learning_rate": 1.808055403921171e-05, + "loss": 0.3279935419559479, + "step": 975 + }, + { + "epoch": 0.4823921907821574, + "grad_norm": 1.1400322966724836, + "learning_rate": 1.8075737993643442e-05, + "loss": 0.36426058411598206, + "step": 976 + }, + { + "epoch": 0.4828864450759916, + "grad_norm": 1.062964412180167, + "learning_rate": 1.8070916556827876e-05, + "loss": 0.3720256984233856, + "step": 977 + }, + { + "epoch": 0.48338069936982575, + "grad_norm": 1.1101144076762623, + "learning_rate": 1.8066089731983735e-05, + "loss": 0.3299727439880371, + "step": 978 + }, + { + "epoch": 0.48387495366365996, + "grad_norm": 1.1080862284860111, + "learning_rate": 1.8061257522333338e-05, + "loss": 0.3425888419151306, + "step": 979 + }, + { + "epoch": 0.4843692079574941, + "grad_norm": 1.1899160965861721, + "learning_rate": 1.80564199311026e-05, + "loss": 0.34109392762184143, + "step": 980 + }, + { + "epoch": 0.4848634622513283, + "grad_norm": 1.017538963669655, + "learning_rate": 1.805157696152103e-05, + "loss": 0.29130926728248596, + "step": 981 + }, + { + "epoch": 0.48535771654516247, + "grad_norm": 1.1092378859222098, + "learning_rate": 1.8046728616821726e-05, + "loss": 0.36200815439224243, + "step": 982 + }, + { + "epoch": 0.48585197083899667, + "grad_norm": 1.3150178990962822, + "learning_rate": 1.8041874900241368e-05, + "loss": 0.3343828320503235, + "step": 983 + }, + { + "epoch": 0.4863462251328308, + "grad_norm": 0.9882024578614582, + "learning_rate": 1.803701581502023e-05, + "loss": 0.32942160964012146, + "step": 984 + }, + { + "epoch": 0.48684047942666503, + "grad_norm": 0.9909863431121513, + "learning_rate": 1.803215136440217e-05, + "loss": 0.34390491247177124, + "step": 985 + }, + { + "epoch": 0.4873347337204992, + "grad_norm": 1.1118778887065912, + "learning_rate": 1.8027281551634622e-05, + "loss": 0.37723374366760254, + "step": 986 + }, + { + "epoch": 0.4878289880143334, + "grad_norm": 1.0469525424396737, + "learning_rate": 1.802240637996861e-05, + "loss": 0.3493693470954895, + "step": 987 + }, + { + "epoch": 0.48832324230816754, + "grad_norm": 1.131021341780466, + "learning_rate": 1.8017525852658723e-05, + "loss": 0.3564317524433136, + "step": 988 + }, + { + "epoch": 0.48881749660200174, + "grad_norm": 1.0435634175515676, + "learning_rate": 1.8012639972963136e-05, + "loss": 0.36572349071502686, + "step": 989 + }, + { + "epoch": 0.4893117508958359, + "grad_norm": 1.0078714155476896, + "learning_rate": 1.8007748744143586e-05, + "loss": 0.31457674503326416, + "step": 990 + }, + { + "epoch": 0.4898060051896701, + "grad_norm": 1.1126722971991523, + "learning_rate": 1.8002852169465393e-05, + "loss": 0.36191096901893616, + "step": 991 + }, + { + "epoch": 0.49030025948350425, + "grad_norm": 1.0321930748215848, + "learning_rate": 1.799795025219744e-05, + "loss": 0.33284491300582886, + "step": 992 + }, + { + "epoch": 0.49079451377733846, + "grad_norm": 1.0239281284644144, + "learning_rate": 1.7993042995612172e-05, + "loss": 0.3101437985897064, + "step": 993 + }, + { + "epoch": 0.4912887680711726, + "grad_norm": 1.033291904553078, + "learning_rate": 1.7988130402985608e-05, + "loss": 0.3196948170661926, + "step": 994 + }, + { + "epoch": 0.4917830223650068, + "grad_norm": 1.1489266069218314, + "learning_rate": 1.7983212477597325e-05, + "loss": 0.3757585883140564, + "step": 995 + }, + { + "epoch": 0.49227727665884097, + "grad_norm": 1.1725728838471274, + "learning_rate": 1.7978289222730454e-05, + "loss": 0.3949659466743469, + "step": 996 + }, + { + "epoch": 0.4927715309526752, + "grad_norm": 1.1279800728609437, + "learning_rate": 1.79733606416717e-05, + "loss": 0.3490184545516968, + "step": 997 + }, + { + "epoch": 0.4932657852465093, + "grad_norm": 1.2158784468170585, + "learning_rate": 1.7968426737711304e-05, + "loss": 0.32302743196487427, + "step": 998 + }, + { + "epoch": 0.49376003954034353, + "grad_norm": 1.1923748239050125, + "learning_rate": 1.7963487514143073e-05, + "loss": 0.4205089807510376, + "step": 999 + }, + { + "epoch": 0.4942542938341777, + "grad_norm": 1.0997609009048648, + "learning_rate": 1.7958542974264363e-05, + "loss": 0.30787885189056396, + "step": 1000 + }, + { + "epoch": 0.4947485481280119, + "grad_norm": 0.9527130505595168, + "learning_rate": 1.7953593121376075e-05, + "loss": 0.3174916207790375, + "step": 1001 + }, + { + "epoch": 0.49524280242184604, + "grad_norm": 0.9736659707101099, + "learning_rate": 1.7948637958782662e-05, + "loss": 0.330039381980896, + "step": 1002 + }, + { + "epoch": 0.4957370567156802, + "grad_norm": 1.0487288206783625, + "learning_rate": 1.794367748979212e-05, + "loss": 0.3362613320350647, + "step": 1003 + }, + { + "epoch": 0.4962313110095144, + "grad_norm": 1.065682818958373, + "learning_rate": 1.793871171771599e-05, + "loss": 0.3479865789413452, + "step": 1004 + }, + { + "epoch": 0.49672556530334855, + "grad_norm": 1.0920057715386207, + "learning_rate": 1.7933740645869345e-05, + "loss": 0.361303448677063, + "step": 1005 + }, + { + "epoch": 0.49721981959718276, + "grad_norm": 1.07605927747069, + "learning_rate": 1.79287642775708e-05, + "loss": 0.32340794801712036, + "step": 1006 + }, + { + "epoch": 0.4977140738910169, + "grad_norm": 1.086462795838887, + "learning_rate": 1.792378261614252e-05, + "loss": 0.3410148620605469, + "step": 1007 + }, + { + "epoch": 0.4982083281848511, + "grad_norm": 1.0450045575623719, + "learning_rate": 1.791879566491018e-05, + "loss": 0.3332127034664154, + "step": 1008 + }, + { + "epoch": 0.49870258247868526, + "grad_norm": 1.1673390171795246, + "learning_rate": 1.7913803427202998e-05, + "loss": 0.36532774567604065, + "step": 1009 + }, + { + "epoch": 0.49919683677251947, + "grad_norm": 1.1838892890378474, + "learning_rate": 1.7908805906353725e-05, + "loss": 0.3721959888935089, + "step": 1010 + }, + { + "epoch": 0.4996910910663536, + "grad_norm": 0.990806411218012, + "learning_rate": 1.7903803105698627e-05, + "loss": 0.3406672477722168, + "step": 1011 + }, + { + "epoch": 0.5001853453601878, + "grad_norm": 1.0152890264941994, + "learning_rate": 1.789879502857751e-05, + "loss": 0.323926717042923, + "step": 1012 + }, + { + "epoch": 0.500679599654022, + "grad_norm": 1.082078334287421, + "learning_rate": 1.7893781678333694e-05, + "loss": 0.36245018243789673, + "step": 1013 + }, + { + "epoch": 0.5011738539478562, + "grad_norm": 1.1363612319173766, + "learning_rate": 1.7888763058314016e-05, + "loss": 0.36145877838134766, + "step": 1014 + }, + { + "epoch": 0.5016681082416904, + "grad_norm": 0.9479821815236287, + "learning_rate": 1.788373917186884e-05, + "loss": 0.31398001313209534, + "step": 1015 + }, + { + "epoch": 0.5021623625355245, + "grad_norm": 1.0634976007398544, + "learning_rate": 1.7878710022352033e-05, + "loss": 0.36732447147369385, + "step": 1016 + }, + { + "epoch": 0.5026566168293587, + "grad_norm": 1.0888289854290114, + "learning_rate": 1.787367561312099e-05, + "loss": 0.3336929678916931, + "step": 1017 + }, + { + "epoch": 0.5031508711231929, + "grad_norm": 1.081948070644993, + "learning_rate": 1.786863594753661e-05, + "loss": 0.33306068181991577, + "step": 1018 + }, + { + "epoch": 0.5036451254170271, + "grad_norm": 1.1710814753085148, + "learning_rate": 1.7863591028963297e-05, + "loss": 0.32577213644981384, + "step": 1019 + }, + { + "epoch": 0.5041393797108612, + "grad_norm": 1.0902819718302648, + "learning_rate": 1.7858540860768974e-05, + "loss": 0.33542972803115845, + "step": 1020 + }, + { + "epoch": 0.5046336340046954, + "grad_norm": 1.1116685663765398, + "learning_rate": 1.7853485446325055e-05, + "loss": 0.3075249195098877, + "step": 1021 + }, + { + "epoch": 0.5051278882985296, + "grad_norm": 1.135601263046101, + "learning_rate": 1.7848424789006466e-05, + "loss": 0.3473510146141052, + "step": 1022 + }, + { + "epoch": 0.5056221425923638, + "grad_norm": 1.2152682076096186, + "learning_rate": 1.784335889219163e-05, + "loss": 0.3543929159641266, + "step": 1023 + }, + { + "epoch": 0.5061163968861979, + "grad_norm": 1.026549045591816, + "learning_rate": 1.783828775926246e-05, + "loss": 0.3198593556880951, + "step": 1024 + }, + { + "epoch": 0.5066106511800321, + "grad_norm": 1.07796975394457, + "learning_rate": 1.783321139360438e-05, + "loss": 0.34223973751068115, + "step": 1025 + }, + { + "epoch": 0.5071049054738663, + "grad_norm": 1.2487195797385122, + "learning_rate": 1.78281297986063e-05, + "loss": 0.3895387351512909, + "step": 1026 + }, + { + "epoch": 0.5075991597677005, + "grad_norm": 1.0333211037977794, + "learning_rate": 1.782304297766061e-05, + "loss": 0.35764580965042114, + "step": 1027 + }, + { + "epoch": 0.5080934140615346, + "grad_norm": 0.9679048017438919, + "learning_rate": 1.7817950934163213e-05, + "loss": 0.30859488248825073, + "step": 1028 + }, + { + "epoch": 0.5085876683553688, + "grad_norm": 1.0913185130679384, + "learning_rate": 1.7812853671513472e-05, + "loss": 0.3554389476776123, + "step": 1029 + }, + { + "epoch": 0.509081922649203, + "grad_norm": 1.0101463789736986, + "learning_rate": 1.7807751193114254e-05, + "loss": 0.3528766632080078, + "step": 1030 + }, + { + "epoch": 0.5095761769430371, + "grad_norm": 1.054067237260528, + "learning_rate": 1.78026435023719e-05, + "loss": 0.3645275831222534, + "step": 1031 + }, + { + "epoch": 0.5100704312368713, + "grad_norm": 1.338540047449502, + "learning_rate": 1.779753060269623e-05, + "loss": 0.3137075901031494, + "step": 1032 + }, + { + "epoch": 0.5105646855307056, + "grad_norm": 1.0928434325752037, + "learning_rate": 1.7792412497500538e-05, + "loss": 0.31993091106414795, + "step": 1033 + }, + { + "epoch": 0.5110589398245398, + "grad_norm": 1.032718640643118, + "learning_rate": 1.7787289190201606e-05, + "loss": 0.3514295220375061, + "step": 1034 + }, + { + "epoch": 0.5115531941183739, + "grad_norm": 0.9529992201270954, + "learning_rate": 1.7782160684219677e-05, + "loss": 0.3167670667171478, + "step": 1035 + }, + { + "epoch": 0.5120474484122081, + "grad_norm": 1.1056391999630892, + "learning_rate": 1.7777026982978473e-05, + "loss": 0.3298097252845764, + "step": 1036 + }, + { + "epoch": 0.5125417027060423, + "grad_norm": 1.008539858185866, + "learning_rate": 1.777188808990517e-05, + "loss": 0.3334948420524597, + "step": 1037 + }, + { + "epoch": 0.5130359569998765, + "grad_norm": 1.1451382861648118, + "learning_rate": 1.776674400843043e-05, + "loss": 0.3705115020275116, + "step": 1038 + }, + { + "epoch": 0.5135302112937106, + "grad_norm": 1.2062150323771585, + "learning_rate": 1.7761594741988356e-05, + "loss": 0.3586978614330292, + "step": 1039 + }, + { + "epoch": 0.5140244655875448, + "grad_norm": 0.9949081741462515, + "learning_rate": 1.7756440294016535e-05, + "loss": 0.3105466663837433, + "step": 1040 + }, + { + "epoch": 0.514518719881379, + "grad_norm": 1.240576049327348, + "learning_rate": 1.7751280667956002e-05, + "loss": 0.35213470458984375, + "step": 1041 + }, + { + "epoch": 0.5150129741752132, + "grad_norm": 1.1494264660428748, + "learning_rate": 1.7746115867251245e-05, + "loss": 0.3830525875091553, + "step": 1042 + }, + { + "epoch": 0.5155072284690473, + "grad_norm": 1.044917786849415, + "learning_rate": 1.7740945895350215e-05, + "loss": 0.34106165170669556, + "step": 1043 + }, + { + "epoch": 0.5160014827628815, + "grad_norm": 0.9456529066854209, + "learning_rate": 1.773577075570431e-05, + "loss": 0.33408549427986145, + "step": 1044 + }, + { + "epoch": 0.5164957370567157, + "grad_norm": 1.057634132461443, + "learning_rate": 1.7730590451768375e-05, + "loss": 0.32823115587234497, + "step": 1045 + }, + { + "epoch": 0.5169899913505499, + "grad_norm": 0.9870247990943719, + "learning_rate": 1.7725404987000716e-05, + "loss": 0.2866591811180115, + "step": 1046 + }, + { + "epoch": 0.517484245644384, + "grad_norm": 1.0669638645996897, + "learning_rate": 1.772021436486307e-05, + "loss": 0.34053099155426025, + "step": 1047 + }, + { + "epoch": 0.5179784999382182, + "grad_norm": 1.0384310943814752, + "learning_rate": 1.771501858882062e-05, + "loss": 0.30379486083984375, + "step": 1048 + }, + { + "epoch": 0.5184727542320524, + "grad_norm": 1.299899967945095, + "learning_rate": 1.7709817662341998e-05, + "loss": 0.37569302320480347, + "step": 1049 + }, + { + "epoch": 0.5189670085258866, + "grad_norm": 1.0489606422309163, + "learning_rate": 1.770461158889926e-05, + "loss": 0.31770390272140503, + "step": 1050 + }, + { + "epoch": 0.5194612628197207, + "grad_norm": 1.1640089464310481, + "learning_rate": 1.769940037196791e-05, + "loss": 0.34175002574920654, + "step": 1051 + }, + { + "epoch": 0.5199555171135549, + "grad_norm": 1.0797819699416114, + "learning_rate": 1.769418401502689e-05, + "loss": 0.3634580671787262, + "step": 1052 + }, + { + "epoch": 0.5204497714073891, + "grad_norm": 1.1990448584577926, + "learning_rate": 1.7688962521558554e-05, + "loss": 0.3631044030189514, + "step": 1053 + }, + { + "epoch": 0.5209440257012233, + "grad_norm": 1.2482048374766477, + "learning_rate": 1.7683735895048698e-05, + "loss": 0.3402160704135895, + "step": 1054 + }, + { + "epoch": 0.5214382799950574, + "grad_norm": 1.2190765212037056, + "learning_rate": 1.7678504138986548e-05, + "loss": 0.3895665407180786, + "step": 1055 + }, + { + "epoch": 0.5219325342888916, + "grad_norm": 1.076846194861831, + "learning_rate": 1.767326725686475e-05, + "loss": 0.32207030057907104, + "step": 1056 + }, + { + "epoch": 0.5224267885827258, + "grad_norm": 1.10282378456951, + "learning_rate": 1.7668025252179363e-05, + "loss": 0.33095866441726685, + "step": 1057 + }, + { + "epoch": 0.5229210428765599, + "grad_norm": 1.1487800022178571, + "learning_rate": 1.7662778128429883e-05, + "loss": 0.33239442110061646, + "step": 1058 + }, + { + "epoch": 0.5234152971703941, + "grad_norm": 0.9873637767970463, + "learning_rate": 1.7657525889119212e-05, + "loss": 0.27432021498680115, + "step": 1059 + }, + { + "epoch": 0.5239095514642284, + "grad_norm": 1.0928994862368866, + "learning_rate": 1.7652268537753672e-05, + "loss": 0.3221333622932434, + "step": 1060 + }, + { + "epoch": 0.5244038057580626, + "grad_norm": 1.114838100134283, + "learning_rate": 1.764700607784299e-05, + "loss": 0.3126341700553894, + "step": 1061 + }, + { + "epoch": 0.5248980600518967, + "grad_norm": 1.0401864286303986, + "learning_rate": 1.7641738512900315e-05, + "loss": 0.33239883184432983, + "step": 1062 + }, + { + "epoch": 0.5253923143457309, + "grad_norm": 0.9509614150111031, + "learning_rate": 1.7636465846442197e-05, + "loss": 0.30075010657310486, + "step": 1063 + }, + { + "epoch": 0.5258865686395651, + "grad_norm": 1.0717488761603333, + "learning_rate": 1.763118808198859e-05, + "loss": 0.3577713370323181, + "step": 1064 + }, + { + "epoch": 0.5263808229333993, + "grad_norm": 1.0802706273753335, + "learning_rate": 1.7625905223062858e-05, + "loss": 0.3483964204788208, + "step": 1065 + }, + { + "epoch": 0.5268750772272334, + "grad_norm": 1.1651963376515642, + "learning_rate": 1.762061727319176e-05, + "loss": 0.3622454106807709, + "step": 1066 + }, + { + "epoch": 0.5273693315210676, + "grad_norm": 1.0440643033385941, + "learning_rate": 1.761532423590545e-05, + "loss": 0.35156917572021484, + "step": 1067 + }, + { + "epoch": 0.5278635858149018, + "grad_norm": 1.1589394381083906, + "learning_rate": 1.7610026114737498e-05, + "loss": 0.3413820266723633, + "step": 1068 + }, + { + "epoch": 0.528357840108736, + "grad_norm": 1.1280561588615983, + "learning_rate": 1.760472291322484e-05, + "loss": 0.3707934021949768, + "step": 1069 + }, + { + "epoch": 0.5288520944025701, + "grad_norm": 1.2170503232061094, + "learning_rate": 1.7599414634907828e-05, + "loss": 0.3472951054573059, + "step": 1070 + }, + { + "epoch": 0.5293463486964043, + "grad_norm": 1.1676650140216285, + "learning_rate": 1.7594101283330184e-05, + "loss": 0.393882155418396, + "step": 1071 + }, + { + "epoch": 0.5298406029902385, + "grad_norm": 0.9683606994511744, + "learning_rate": 1.758878286203903e-05, + "loss": 0.3094913065433502, + "step": 1072 + }, + { + "epoch": 0.5303348572840727, + "grad_norm": 1.09347684867524, + "learning_rate": 1.758345937458487e-05, + "loss": 0.33904048800468445, + "step": 1073 + }, + { + "epoch": 0.5308291115779068, + "grad_norm": 1.0218184375103434, + "learning_rate": 1.7578130824521585e-05, + "loss": 0.3218901753425598, + "step": 1074 + }, + { + "epoch": 0.531323365871741, + "grad_norm": 0.95615697696865, + "learning_rate": 1.7572797215406442e-05, + "loss": 0.31584852933883667, + "step": 1075 + }, + { + "epoch": 0.5318176201655752, + "grad_norm": 0.9682503945021611, + "learning_rate": 1.756745855080008e-05, + "loss": 0.3449877202510834, + "step": 1076 + }, + { + "epoch": 0.5323118744594094, + "grad_norm": 1.084607183777355, + "learning_rate": 1.756211483426651e-05, + "loss": 0.3544886112213135, + "step": 1077 + }, + { + "epoch": 0.5328061287532435, + "grad_norm": 1.1680618553038933, + "learning_rate": 1.755676606937313e-05, + "loss": 0.34360697865486145, + "step": 1078 + }, + { + "epoch": 0.5333003830470777, + "grad_norm": 1.0514045755368502, + "learning_rate": 1.7551412259690695e-05, + "loss": 0.3214710056781769, + "step": 1079 + }, + { + "epoch": 0.5337946373409119, + "grad_norm": 0.9951048830690797, + "learning_rate": 1.754605340879333e-05, + "loss": 0.33841896057128906, + "step": 1080 + }, + { + "epoch": 0.534288891634746, + "grad_norm": 1.0536673015942455, + "learning_rate": 1.7540689520258532e-05, + "loss": 0.3134745657444, + "step": 1081 + }, + { + "epoch": 0.5347831459285802, + "grad_norm": 1.1773503335041235, + "learning_rate": 1.753532059766715e-05, + "loss": 0.3469204306602478, + "step": 1082 + }, + { + "epoch": 0.5352774002224144, + "grad_norm": 1.3802140663046265, + "learning_rate": 1.752994664460341e-05, + "loss": 0.39217621088027954, + "step": 1083 + }, + { + "epoch": 0.5357716545162486, + "grad_norm": 1.148906185686213, + "learning_rate": 1.7524567664654873e-05, + "loss": 0.34482622146606445, + "step": 1084 + }, + { + "epoch": 0.5362659088100827, + "grad_norm": 1.0089175831530743, + "learning_rate": 1.751918366141248e-05, + "loss": 0.308369517326355, + "step": 1085 + }, + { + "epoch": 0.5367601631039169, + "grad_norm": 1.1441511379564429, + "learning_rate": 1.751379463847051e-05, + "loss": 0.3396676480770111, + "step": 1086 + }, + { + "epoch": 0.5372544173977511, + "grad_norm": 1.0963418237920814, + "learning_rate": 1.7508400599426596e-05, + "loss": 0.3059370517730713, + "step": 1087 + }, + { + "epoch": 0.5377486716915854, + "grad_norm": 0.993693807257297, + "learning_rate": 1.7503001547881728e-05, + "loss": 0.31689077615737915, + "step": 1088 + }, + { + "epoch": 0.5382429259854195, + "grad_norm": 1.2996366258679217, + "learning_rate": 1.749759748744023e-05, + "loss": 0.37134337425231934, + "step": 1089 + }, + { + "epoch": 0.5387371802792537, + "grad_norm": 1.0586799377490923, + "learning_rate": 1.7492188421709775e-05, + "loss": 0.30404967069625854, + "step": 1090 + }, + { + "epoch": 0.5392314345730879, + "grad_norm": 1.1213884593031693, + "learning_rate": 1.7486774354301382e-05, + "loss": 0.34773269295692444, + "step": 1091 + }, + { + "epoch": 0.5397256888669221, + "grad_norm": 1.135256212480744, + "learning_rate": 1.7481355288829404e-05, + "loss": 0.34448760747909546, + "step": 1092 + }, + { + "epoch": 0.5402199431607562, + "grad_norm": 1.1111138178806874, + "learning_rate": 1.7475931228911526e-05, + "loss": 0.33557915687561035, + "step": 1093 + }, + { + "epoch": 0.5407141974545904, + "grad_norm": 1.1277612406863344, + "learning_rate": 1.7470502178168783e-05, + "loss": 0.3216322362422943, + "step": 1094 + }, + { + "epoch": 0.5412084517484246, + "grad_norm": 1.1416777218141756, + "learning_rate": 1.7465068140225524e-05, + "loss": 0.3175346255302429, + "step": 1095 + }, + { + "epoch": 0.5417027060422588, + "grad_norm": 1.0466005920407673, + "learning_rate": 1.7459629118709435e-05, + "loss": 0.3150678277015686, + "step": 1096 + }, + { + "epoch": 0.5421969603360929, + "grad_norm": 1.1080261557130098, + "learning_rate": 1.7454185117251534e-05, + "loss": 0.3372325897216797, + "step": 1097 + }, + { + "epoch": 0.5426912146299271, + "grad_norm": 1.1607395393986693, + "learning_rate": 1.7448736139486156e-05, + "loss": 0.3460095524787903, + "step": 1098 + }, + { + "epoch": 0.5431854689237613, + "grad_norm": 1.0960477562857334, + "learning_rate": 1.7443282189050964e-05, + "loss": 0.3465900421142578, + "step": 1099 + }, + { + "epoch": 0.5436797232175955, + "grad_norm": 1.1271957826518202, + "learning_rate": 1.7437823269586925e-05, + "loss": 0.3707941174507141, + "step": 1100 + }, + { + "epoch": 0.5441739775114296, + "grad_norm": 1.0732325510644303, + "learning_rate": 1.7432359384738354e-05, + "loss": 0.3317713141441345, + "step": 1101 + }, + { + "epoch": 0.5446682318052638, + "grad_norm": 1.10075448775578, + "learning_rate": 1.742689053815285e-05, + "loss": 0.3391956090927124, + "step": 1102 + }, + { + "epoch": 0.545162486099098, + "grad_norm": 1.483156522178114, + "learning_rate": 1.742141673348134e-05, + "loss": 0.3838513195514679, + "step": 1103 + }, + { + "epoch": 0.5456567403929322, + "grad_norm": 1.2368776155357775, + "learning_rate": 1.7415937974378057e-05, + "loss": 0.4438849687576294, + "step": 1104 + }, + { + "epoch": 0.5461509946867663, + "grad_norm": 1.1360365035496875, + "learning_rate": 1.7410454264500542e-05, + "loss": 0.35329896211624146, + "step": 1105 + }, + { + "epoch": 0.5466452489806005, + "grad_norm": 0.9946710480219276, + "learning_rate": 1.7404965607509646e-05, + "loss": 0.3124481439590454, + "step": 1106 + }, + { + "epoch": 0.5471395032744347, + "grad_norm": 1.1827285369169889, + "learning_rate": 1.739947200706951e-05, + "loss": 0.3595995008945465, + "step": 1107 + }, + { + "epoch": 0.5476337575682688, + "grad_norm": 1.0771205850736374, + "learning_rate": 1.7393973466847592e-05, + "loss": 0.35914891958236694, + "step": 1108 + }, + { + "epoch": 0.548128011862103, + "grad_norm": 1.0372075645038734, + "learning_rate": 1.7388469990514636e-05, + "loss": 0.34034737944602966, + "step": 1109 + }, + { + "epoch": 0.5486222661559372, + "grad_norm": 0.9639792162761298, + "learning_rate": 1.7382961581744677e-05, + "loss": 0.3033643066883087, + "step": 1110 + }, + { + "epoch": 0.5491165204497714, + "grad_norm": 1.0333536833038373, + "learning_rate": 1.737744824421506e-05, + "loss": 0.3239862322807312, + "step": 1111 + }, + { + "epoch": 0.5496107747436055, + "grad_norm": 1.0992782883377998, + "learning_rate": 1.7371929981606403e-05, + "loss": 0.36473411321640015, + "step": 1112 + }, + { + "epoch": 0.5501050290374397, + "grad_norm": 0.9808971248907185, + "learning_rate": 1.7366406797602625e-05, + "loss": 0.3129761517047882, + "step": 1113 + }, + { + "epoch": 0.550599283331274, + "grad_norm": 1.0031500416462213, + "learning_rate": 1.736087869589092e-05, + "loss": 0.30224812030792236, + "step": 1114 + }, + { + "epoch": 0.5510935376251082, + "grad_norm": 1.0008522519559948, + "learning_rate": 1.7355345680161774e-05, + "loss": 0.30045247077941895, + "step": 1115 + }, + { + "epoch": 0.5515877919189422, + "grad_norm": 1.1079372723945795, + "learning_rate": 1.7349807754108944e-05, + "loss": 0.3356926739215851, + "step": 1116 + }, + { + "epoch": 0.5520820462127765, + "grad_norm": 1.3704982317685879, + "learning_rate": 1.7344264921429475e-05, + "loss": 0.37749868631362915, + "step": 1117 + }, + { + "epoch": 0.5525763005066107, + "grad_norm": 1.0400914273370205, + "learning_rate": 1.733871718582368e-05, + "loss": 0.331012099981308, + "step": 1118 + }, + { + "epoch": 0.5530705548004449, + "grad_norm": 1.2654046748606915, + "learning_rate": 1.7333164550995153e-05, + "loss": 0.3557187020778656, + "step": 1119 + }, + { + "epoch": 0.553564809094279, + "grad_norm": 1.151377810019934, + "learning_rate": 1.7327607020650744e-05, + "loss": 0.34102991223335266, + "step": 1120 + }, + { + "epoch": 0.5540590633881132, + "grad_norm": 1.0397881413898085, + "learning_rate": 1.7322044598500594e-05, + "loss": 0.328019917011261, + "step": 1121 + }, + { + "epoch": 0.5545533176819474, + "grad_norm": 1.0773058589187376, + "learning_rate": 1.7316477288258085e-05, + "loss": 0.33980751037597656, + "step": 1122 + }, + { + "epoch": 0.5550475719757816, + "grad_norm": 1.1823119583137516, + "learning_rate": 1.731090509363988e-05, + "loss": 0.3460109233856201, + "step": 1123 + }, + { + "epoch": 0.5555418262696157, + "grad_norm": 1.0727245460190564, + "learning_rate": 1.730532801836589e-05, + "loss": 0.3013002276420593, + "step": 1124 + }, + { + "epoch": 0.5560360805634499, + "grad_norm": 1.191952525403325, + "learning_rate": 1.72997460661593e-05, + "loss": 0.36195772886276245, + "step": 1125 + }, + { + "epoch": 0.5565303348572841, + "grad_norm": 1.1481571926267522, + "learning_rate": 1.7294159240746532e-05, + "loss": 0.3368675112724304, + "step": 1126 + }, + { + "epoch": 0.5570245891511183, + "grad_norm": 1.0950064938478345, + "learning_rate": 1.7288567545857283e-05, + "loss": 0.36618539690971375, + "step": 1127 + }, + { + "epoch": 0.5575188434449524, + "grad_norm": 1.0773610015009678, + "learning_rate": 1.7282970985224477e-05, + "loss": 0.3230215311050415, + "step": 1128 + }, + { + "epoch": 0.5580130977387866, + "grad_norm": 1.1539889538468413, + "learning_rate": 1.72773695625843e-05, + "loss": 0.38779711723327637, + "step": 1129 + }, + { + "epoch": 0.5585073520326208, + "grad_norm": 1.0853438524765577, + "learning_rate": 1.7271763281676187e-05, + "loss": 0.33910998702049255, + "step": 1130 + }, + { + "epoch": 0.559001606326455, + "grad_norm": 1.1265909455665821, + "learning_rate": 1.726615214624281e-05, + "loss": 0.3526651859283447, + "step": 1131 + }, + { + "epoch": 0.5594958606202891, + "grad_norm": 1.0899084132349224, + "learning_rate": 1.7260536160030077e-05, + "loss": 0.33794116973876953, + "step": 1132 + }, + { + "epoch": 0.5599901149141233, + "grad_norm": 1.2383181058563666, + "learning_rate": 1.7254915326787145e-05, + "loss": 0.3294123411178589, + "step": 1133 + }, + { + "epoch": 0.5604843692079575, + "grad_norm": 1.0381296685245769, + "learning_rate": 1.7249289650266402e-05, + "loss": 0.31193166971206665, + "step": 1134 + }, + { + "epoch": 0.5609786235017916, + "grad_norm": 1.0273514183990056, + "learning_rate": 1.7243659134223467e-05, + "loss": 0.298290491104126, + "step": 1135 + }, + { + "epoch": 0.5614728777956258, + "grad_norm": 1.0372406743131939, + "learning_rate": 1.7238023782417194e-05, + "loss": 0.3157176971435547, + "step": 1136 + }, + { + "epoch": 0.56196713208946, + "grad_norm": 0.9703670449018593, + "learning_rate": 1.7232383598609664e-05, + "loss": 0.3152535855770111, + "step": 1137 + }, + { + "epoch": 0.5624613863832942, + "grad_norm": 1.1457741905911056, + "learning_rate": 1.722673858656618e-05, + "loss": 0.35004952549934387, + "step": 1138 + }, + { + "epoch": 0.5629556406771283, + "grad_norm": 1.2128755723830003, + "learning_rate": 1.722108875005527e-05, + "loss": 0.3531174957752228, + "step": 1139 + }, + { + "epoch": 0.5634498949709625, + "grad_norm": 0.9896343114056704, + "learning_rate": 1.7215434092848693e-05, + "loss": 0.32532358169555664, + "step": 1140 + }, + { + "epoch": 0.5639441492647967, + "grad_norm": 1.086973420033045, + "learning_rate": 1.7209774618721408e-05, + "loss": 0.3252495229244232, + "step": 1141 + }, + { + "epoch": 0.564438403558631, + "grad_norm": 1.1232225314649664, + "learning_rate": 1.7204110331451603e-05, + "loss": 0.35428208112716675, + "step": 1142 + }, + { + "epoch": 0.564932657852465, + "grad_norm": 1.165276028587328, + "learning_rate": 1.7198441234820674e-05, + "loss": 0.37419646978378296, + "step": 1143 + }, + { + "epoch": 0.5654269121462993, + "grad_norm": 1.1206339776354848, + "learning_rate": 1.7192767332613235e-05, + "loss": 0.3342249095439911, + "step": 1144 + }, + { + "epoch": 0.5659211664401335, + "grad_norm": 1.0700889667237288, + "learning_rate": 1.7187088628617093e-05, + "loss": 0.36827898025512695, + "step": 1145 + }, + { + "epoch": 0.5664154207339677, + "grad_norm": 1.1884715403984119, + "learning_rate": 1.7181405126623275e-05, + "loss": 0.3560858964920044, + "step": 1146 + }, + { + "epoch": 0.5669096750278018, + "grad_norm": 1.0578073497156413, + "learning_rate": 1.7175716830426005e-05, + "loss": 0.35333797335624695, + "step": 1147 + }, + { + "epoch": 0.567403929321636, + "grad_norm": 1.0504095801617317, + "learning_rate": 1.71700237438227e-05, + "loss": 0.31053799390792847, + "step": 1148 + }, + { + "epoch": 0.5678981836154702, + "grad_norm": 1.1443484208273471, + "learning_rate": 1.7164325870613998e-05, + "loss": 0.37123826146125793, + "step": 1149 + }, + { + "epoch": 0.5683924379093044, + "grad_norm": 1.069054169156011, + "learning_rate": 1.715862321460371e-05, + "loss": 0.33981990814208984, + "step": 1150 + }, + { + "epoch": 0.5688866922031385, + "grad_norm": 1.1295222791710222, + "learning_rate": 1.7152915779598846e-05, + "loss": 0.34938257932662964, + "step": 1151 + }, + { + "epoch": 0.5693809464969727, + "grad_norm": 1.10704413276648, + "learning_rate": 1.714720356940961e-05, + "loss": 0.3069387376308441, + "step": 1152 + }, + { + "epoch": 0.5698752007908069, + "grad_norm": 1.1206304490989205, + "learning_rate": 1.7141486587849397e-05, + "loss": 0.34879156947135925, + "step": 1153 + }, + { + "epoch": 0.5703694550846411, + "grad_norm": 1.140159647567344, + "learning_rate": 1.7135764838734773e-05, + "loss": 0.3624545931816101, + "step": 1154 + }, + { + "epoch": 0.5708637093784752, + "grad_norm": 1.0671159168894162, + "learning_rate": 1.7130038325885502e-05, + "loss": 0.3548320531845093, + "step": 1155 + }, + { + "epoch": 0.5713579636723094, + "grad_norm": 1.0469806768045702, + "learning_rate": 1.7124307053124518e-05, + "loss": 0.3004404902458191, + "step": 1156 + }, + { + "epoch": 0.5718522179661436, + "grad_norm": 1.1058227077648823, + "learning_rate": 1.7118571024277943e-05, + "loss": 0.31545472145080566, + "step": 1157 + }, + { + "epoch": 0.5723464722599778, + "grad_norm": 1.100412587450837, + "learning_rate": 1.711283024317506e-05, + "loss": 0.3116477429866791, + "step": 1158 + }, + { + "epoch": 0.5728407265538119, + "grad_norm": 1.1169526030822408, + "learning_rate": 1.710708471364834e-05, + "loss": 0.3472268581390381, + "step": 1159 + }, + { + "epoch": 0.5733349808476461, + "grad_norm": 1.1641407854241053, + "learning_rate": 1.7101334439533414e-05, + "loss": 0.33334046602249146, + "step": 1160 + }, + { + "epoch": 0.5738292351414803, + "grad_norm": 1.1720238639752558, + "learning_rate": 1.7095579424669074e-05, + "loss": 0.3462664783000946, + "step": 1161 + }, + { + "epoch": 0.5743234894353144, + "grad_norm": 1.0854325044336006, + "learning_rate": 1.7089819672897304e-05, + "loss": 0.3241977393627167, + "step": 1162 + }, + { + "epoch": 0.5748177437291486, + "grad_norm": 1.2501733360326688, + "learning_rate": 1.7084055188063217e-05, + "loss": 0.3194134533405304, + "step": 1163 + }, + { + "epoch": 0.5753119980229828, + "grad_norm": 1.1336053472715226, + "learning_rate": 1.7078285974015103e-05, + "loss": 0.3644179701805115, + "step": 1164 + }, + { + "epoch": 0.575806252316817, + "grad_norm": 1.1434067682408584, + "learning_rate": 1.7072512034604412e-05, + "loss": 0.36653730273246765, + "step": 1165 + }, + { + "epoch": 0.5763005066106511, + "grad_norm": 1.1221051792069954, + "learning_rate": 1.706673337368574e-05, + "loss": 0.3435714840888977, + "step": 1166 + }, + { + "epoch": 0.5767947609044853, + "grad_norm": 1.0603782757024258, + "learning_rate": 1.706094999511684e-05, + "loss": 0.36935871839523315, + "step": 1167 + }, + { + "epoch": 0.5772890151983195, + "grad_norm": 0.9845968090919184, + "learning_rate": 1.7055161902758607e-05, + "loss": 0.29493796825408936, + "step": 1168 + }, + { + "epoch": 0.5777832694921538, + "grad_norm": 1.0115254154804856, + "learning_rate": 1.70493691004751e-05, + "loss": 0.32378828525543213, + "step": 1169 + }, + { + "epoch": 0.5782775237859878, + "grad_norm": 1.1123861652198228, + "learning_rate": 1.70435715921335e-05, + "loss": 0.3587600588798523, + "step": 1170 + }, + { + "epoch": 0.578771778079822, + "grad_norm": 1.1091481408248292, + "learning_rate": 1.703776938160415e-05, + "loss": 0.31885826587677, + "step": 1171 + }, + { + "epoch": 0.5792660323736563, + "grad_norm": 1.0414979222224348, + "learning_rate": 1.7031962472760514e-05, + "loss": 0.2950041890144348, + "step": 1172 + }, + { + "epoch": 0.5797602866674905, + "grad_norm": 1.121100234384589, + "learning_rate": 1.7026150869479208e-05, + "loss": 0.36190298199653625, + "step": 1173 + }, + { + "epoch": 0.5802545409613246, + "grad_norm": 1.067632760047313, + "learning_rate": 1.7020334575639972e-05, + "loss": 0.3402514159679413, + "step": 1174 + }, + { + "epoch": 0.5807487952551588, + "grad_norm": 0.9679286148168113, + "learning_rate": 1.7014513595125684e-05, + "loss": 0.3131282925605774, + "step": 1175 + }, + { + "epoch": 0.581243049548993, + "grad_norm": 1.056786860676952, + "learning_rate": 1.7008687931822344e-05, + "loss": 0.29499226808547974, + "step": 1176 + }, + { + "epoch": 0.5817373038428272, + "grad_norm": 1.0712930292635054, + "learning_rate": 1.700285758961908e-05, + "loss": 0.36821871995925903, + "step": 1177 + }, + { + "epoch": 0.5822315581366613, + "grad_norm": 1.2780126948070993, + "learning_rate": 1.6997022572408152e-05, + "loss": 0.31486836075782776, + "step": 1178 + }, + { + "epoch": 0.5827258124304955, + "grad_norm": 1.0778384840117066, + "learning_rate": 1.6991182884084928e-05, + "loss": 0.3176078498363495, + "step": 1179 + }, + { + "epoch": 0.5832200667243297, + "grad_norm": 1.294300282858588, + "learning_rate": 1.69853385285479e-05, + "loss": 0.4130980968475342, + "step": 1180 + }, + { + "epoch": 0.5837143210181639, + "grad_norm": 1.103648457674251, + "learning_rate": 1.697948950969868e-05, + "loss": 0.3164641857147217, + "step": 1181 + }, + { + "epoch": 0.584208575311998, + "grad_norm": 1.1707357674613739, + "learning_rate": 1.697363583144199e-05, + "loss": 0.36420726776123047, + "step": 1182 + }, + { + "epoch": 0.5847028296058322, + "grad_norm": 1.1827091905189109, + "learning_rate": 1.696777749768566e-05, + "loss": 0.3279833197593689, + "step": 1183 + }, + { + "epoch": 0.5851970838996664, + "grad_norm": 1.2462082843052198, + "learning_rate": 1.696191451234063e-05, + "loss": 0.311473548412323, + "step": 1184 + }, + { + "epoch": 0.5856913381935006, + "grad_norm": 1.0514702517271486, + "learning_rate": 1.6956046879320943e-05, + "loss": 0.32284629344940186, + "step": 1185 + }, + { + "epoch": 0.5861855924873347, + "grad_norm": 1.081683685343838, + "learning_rate": 1.6950174602543753e-05, + "loss": 0.3318635821342468, + "step": 1186 + }, + { + "epoch": 0.5866798467811689, + "grad_norm": 1.10655975155716, + "learning_rate": 1.6944297685929298e-05, + "loss": 0.3268307149410248, + "step": 1187 + }, + { + "epoch": 0.5871741010750031, + "grad_norm": 1.1757413336808826, + "learning_rate": 1.6938416133400934e-05, + "loss": 0.31885889172554016, + "step": 1188 + }, + { + "epoch": 0.5876683553688372, + "grad_norm": 1.044019985672413, + "learning_rate": 1.69325299488851e-05, + "loss": 0.29273971915245056, + "step": 1189 + }, + { + "epoch": 0.5881626096626714, + "grad_norm": 1.2128861059808687, + "learning_rate": 1.692663913631132e-05, + "loss": 0.3585188388824463, + "step": 1190 + }, + { + "epoch": 0.5886568639565056, + "grad_norm": 1.152183266519285, + "learning_rate": 1.6920743699612226e-05, + "loss": 0.37145692110061646, + "step": 1191 + }, + { + "epoch": 0.5891511182503398, + "grad_norm": 1.1211663085079848, + "learning_rate": 1.691484364272352e-05, + "loss": 0.34805262088775635, + "step": 1192 + }, + { + "epoch": 0.5896453725441739, + "grad_norm": 1.1094913177494823, + "learning_rate": 1.6908938969584002e-05, + "loss": 0.3540152907371521, + "step": 1193 + }, + { + "epoch": 0.5901396268380081, + "grad_norm": 1.1138288622940957, + "learning_rate": 1.6903029684135545e-05, + "loss": 0.35808512568473816, + "step": 1194 + }, + { + "epoch": 0.5906338811318423, + "grad_norm": 1.2028693910668573, + "learning_rate": 1.68971157903231e-05, + "loss": 0.2881169021129608, + "step": 1195 + }, + { + "epoch": 0.5911281354256765, + "grad_norm": 1.126509020875868, + "learning_rate": 1.6891197292094704e-05, + "loss": 0.33551955223083496, + "step": 1196 + }, + { + "epoch": 0.5916223897195106, + "grad_norm": 1.0141998416691063, + "learning_rate": 1.688527419340146e-05, + "loss": 0.30721622705459595, + "step": 1197 + }, + { + "epoch": 0.5921166440133449, + "grad_norm": 1.0876501850612135, + "learning_rate": 1.687934649819754e-05, + "loss": 0.3296341300010681, + "step": 1198 + }, + { + "epoch": 0.5926108983071791, + "grad_norm": 1.1194456964334092, + "learning_rate": 1.6873414210440194e-05, + "loss": 0.3511606454849243, + "step": 1199 + }, + { + "epoch": 0.5931051526010133, + "grad_norm": 1.0762712673108126, + "learning_rate": 1.6867477334089728e-05, + "loss": 0.34293919801712036, + "step": 1200 + }, + { + "epoch": 0.5935994068948474, + "grad_norm": 0.9942852659141888, + "learning_rate": 1.686153587310952e-05, + "loss": 0.3334580659866333, + "step": 1201 + }, + { + "epoch": 0.5940936611886816, + "grad_norm": 1.1354238373080972, + "learning_rate": 1.6855589831466e-05, + "loss": 0.3542851209640503, + "step": 1202 + }, + { + "epoch": 0.5945879154825158, + "grad_norm": 1.0952906678959344, + "learning_rate": 1.6849639213128667e-05, + "loss": 0.30951520800590515, + "step": 1203 + }, + { + "epoch": 0.59508216977635, + "grad_norm": 1.0716710567299268, + "learning_rate": 1.6843684022070062e-05, + "loss": 0.333478718996048, + "step": 1204 + }, + { + "epoch": 0.5955764240701841, + "grad_norm": 1.0944556204789582, + "learning_rate": 1.683772426226579e-05, + "loss": 0.33562588691711426, + "step": 1205 + }, + { + "epoch": 0.5960706783640183, + "grad_norm": 0.9136596878493712, + "learning_rate": 1.6831759937694497e-05, + "loss": 0.2626678943634033, + "step": 1206 + }, + { + "epoch": 0.5965649326578525, + "grad_norm": 1.1138721974001247, + "learning_rate": 1.6825791052337884e-05, + "loss": 0.349543035030365, + "step": 1207 + }, + { + "epoch": 0.5970591869516867, + "grad_norm": 1.0760285856821303, + "learning_rate": 1.6819817610180696e-05, + "loss": 0.3229057788848877, + "step": 1208 + }, + { + "epoch": 0.5975534412455208, + "grad_norm": 1.0511960959262137, + "learning_rate": 1.681383961521071e-05, + "loss": 0.32023823261260986, + "step": 1209 + }, + { + "epoch": 0.598047695539355, + "grad_norm": 1.0122201188951288, + "learning_rate": 1.680785707141876e-05, + "loss": 0.31556791067123413, + "step": 1210 + }, + { + "epoch": 0.5985419498331892, + "grad_norm": 1.1858949236151264, + "learning_rate": 1.68018699827987e-05, + "loss": 0.33287158608436584, + "step": 1211 + }, + { + "epoch": 0.5990362041270234, + "grad_norm": 1.0276520854994282, + "learning_rate": 1.6795878353347427e-05, + "loss": 0.28690433502197266, + "step": 1212 + }, + { + "epoch": 0.5995304584208575, + "grad_norm": 1.1202382723881081, + "learning_rate": 1.6789882187064862e-05, + "loss": 0.3501484990119934, + "step": 1213 + }, + { + "epoch": 0.6000247127146917, + "grad_norm": 1.15016872261832, + "learning_rate": 1.678388148795397e-05, + "loss": 0.3645259439945221, + "step": 1214 + }, + { + "epoch": 0.6005189670085259, + "grad_norm": 1.0232559071014062, + "learning_rate": 1.6777876260020726e-05, + "loss": 0.3270183801651001, + "step": 1215 + }, + { + "epoch": 0.60101322130236, + "grad_norm": 1.0680433488207848, + "learning_rate": 1.6771866507274132e-05, + "loss": 0.31767967343330383, + "step": 1216 + }, + { + "epoch": 0.6015074755961942, + "grad_norm": 1.0642272352631703, + "learning_rate": 1.6765852233726216e-05, + "loss": 0.3170120120048523, + "step": 1217 + }, + { + "epoch": 0.6020017298900284, + "grad_norm": 1.0689193394735252, + "learning_rate": 1.6759833443392022e-05, + "loss": 0.3270176351070404, + "step": 1218 + }, + { + "epoch": 0.6024959841838626, + "grad_norm": 1.0053062396233938, + "learning_rate": 1.6753810140289608e-05, + "loss": 0.3229079246520996, + "step": 1219 + }, + { + "epoch": 0.6029902384776967, + "grad_norm": 1.060220470914707, + "learning_rate": 1.6747782328440044e-05, + "loss": 0.3366449773311615, + "step": 1220 + }, + { + "epoch": 0.6034844927715309, + "grad_norm": 1.2656940979343048, + "learning_rate": 1.674175001186741e-05, + "loss": 0.4027010500431061, + "step": 1221 + }, + { + "epoch": 0.6039787470653651, + "grad_norm": 1.039989374871811, + "learning_rate": 1.6735713194598798e-05, + "loss": 0.31566083431243896, + "step": 1222 + }, + { + "epoch": 0.6044730013591993, + "grad_norm": 1.1667815915058346, + "learning_rate": 1.67296718806643e-05, + "loss": 0.3361780047416687, + "step": 1223 + }, + { + "epoch": 0.6049672556530334, + "grad_norm": 1.0628494144880791, + "learning_rate": 1.6723626074097007e-05, + "loss": 0.3197939693927765, + "step": 1224 + }, + { + "epoch": 0.6054615099468676, + "grad_norm": 1.078571350485402, + "learning_rate": 1.671757577893302e-05, + "loss": 0.32977360486984253, + "step": 1225 + }, + { + "epoch": 0.6059557642407019, + "grad_norm": 1.1192119082687915, + "learning_rate": 1.671152099921142e-05, + "loss": 0.3434401750564575, + "step": 1226 + }, + { + "epoch": 0.6064500185345361, + "grad_norm": 1.0664877094913836, + "learning_rate": 1.67054617389743e-05, + "loss": 0.33856305480003357, + "step": 1227 + }, + { + "epoch": 0.6069442728283702, + "grad_norm": 1.147959053573069, + "learning_rate": 1.669939800226673e-05, + "loss": 0.31594911217689514, + "step": 1228 + }, + { + "epoch": 0.6074385271222044, + "grad_norm": 1.105417739927691, + "learning_rate": 1.669332979313678e-05, + "loss": 0.32347679138183594, + "step": 1229 + }, + { + "epoch": 0.6079327814160386, + "grad_norm": 1.1057400329817928, + "learning_rate": 1.6687257115635492e-05, + "loss": 0.32733607292175293, + "step": 1230 + }, + { + "epoch": 0.6084270357098728, + "grad_norm": 0.9869005136013326, + "learning_rate": 1.6681179973816908e-05, + "loss": 0.306827187538147, + "step": 1231 + }, + { + "epoch": 0.6089212900037069, + "grad_norm": 1.068802395839477, + "learning_rate": 1.667509837173803e-05, + "loss": 0.3515884280204773, + "step": 1232 + }, + { + "epoch": 0.6094155442975411, + "grad_norm": 1.0062662165973097, + "learning_rate": 1.6669012313458862e-05, + "loss": 0.28699082136154175, + "step": 1233 + }, + { + "epoch": 0.6099097985913753, + "grad_norm": 1.0697164166178312, + "learning_rate": 1.6662921803042356e-05, + "loss": 0.30737537145614624, + "step": 1234 + }, + { + "epoch": 0.6104040528852095, + "grad_norm": 1.0782793991023802, + "learning_rate": 1.665682684455446e-05, + "loss": 0.3193345069885254, + "step": 1235 + }, + { + "epoch": 0.6108983071790436, + "grad_norm": 1.1629258901733988, + "learning_rate": 1.6650727442064073e-05, + "loss": 0.3326336741447449, + "step": 1236 + }, + { + "epoch": 0.6113925614728778, + "grad_norm": 1.0950813589125916, + "learning_rate": 1.6644623599643076e-05, + "loss": 0.2967267632484436, + "step": 1237 + }, + { + "epoch": 0.611886815766712, + "grad_norm": 1.104366364956542, + "learning_rate": 1.66385153213663e-05, + "loss": 0.3163914084434509, + "step": 1238 + }, + { + "epoch": 0.6123810700605461, + "grad_norm": 1.1913476484695409, + "learning_rate": 1.663240261131155e-05, + "loss": 0.40281808376312256, + "step": 1239 + }, + { + "epoch": 0.6128753243543803, + "grad_norm": 1.1744917859448287, + "learning_rate": 1.6626285473559586e-05, + "loss": 0.33946287631988525, + "step": 1240 + }, + { + "epoch": 0.6133695786482145, + "grad_norm": 1.121011060895708, + "learning_rate": 1.6620163912194114e-05, + "loss": 0.3750913143157959, + "step": 1241 + }, + { + "epoch": 0.6138638329420487, + "grad_norm": 1.1601773319994575, + "learning_rate": 1.6614037931301804e-05, + "loss": 0.32449400424957275, + "step": 1242 + }, + { + "epoch": 0.6143580872358828, + "grad_norm": 1.146035054497973, + "learning_rate": 1.6607907534972277e-05, + "loss": 0.3484799861907959, + "step": 1243 + }, + { + "epoch": 0.614852341529717, + "grad_norm": 1.0478699674323781, + "learning_rate": 1.6601772727298095e-05, + "loss": 0.2991127669811249, + "step": 1244 + }, + { + "epoch": 0.6153465958235512, + "grad_norm": 1.0941316253076903, + "learning_rate": 1.6595633512374768e-05, + "loss": 0.339094340801239, + "step": 1245 + }, + { + "epoch": 0.6158408501173854, + "grad_norm": 1.0756027047064132, + "learning_rate": 1.6589489894300744e-05, + "loss": 0.3147842288017273, + "step": 1246 + }, + { + "epoch": 0.6163351044112195, + "grad_norm": 1.0944450465347566, + "learning_rate": 1.6583341877177427e-05, + "loss": 0.3036183714866638, + "step": 1247 + }, + { + "epoch": 0.6168293587050537, + "grad_norm": 1.0983853525092009, + "learning_rate": 1.657718946510913e-05, + "loss": 0.32657095789909363, + "step": 1248 + }, + { + "epoch": 0.6173236129988879, + "grad_norm": 1.0660730573251251, + "learning_rate": 1.6571032662203126e-05, + "loss": 0.3104664385318756, + "step": 1249 + }, + { + "epoch": 0.6178178672927221, + "grad_norm": 1.0675015064613533, + "learning_rate": 1.6564871472569604e-05, + "loss": 0.30392807722091675, + "step": 1250 + }, + { + "epoch": 0.6183121215865562, + "grad_norm": 1.080894190005694, + "learning_rate": 1.655870590032169e-05, + "loss": 0.3087356388568878, + "step": 1251 + }, + { + "epoch": 0.6188063758803904, + "grad_norm": 1.0633256442775108, + "learning_rate": 1.6552535949575427e-05, + "loss": 0.3220480978488922, + "step": 1252 + }, + { + "epoch": 0.6193006301742247, + "grad_norm": 1.0867949301055795, + "learning_rate": 1.654636162444979e-05, + "loss": 0.33925485610961914, + "step": 1253 + }, + { + "epoch": 0.6197948844680589, + "grad_norm": 1.0651223448844926, + "learning_rate": 1.6540182929066667e-05, + "loss": 0.3704617917537689, + "step": 1254 + }, + { + "epoch": 0.620289138761893, + "grad_norm": 1.1158405395395257, + "learning_rate": 1.653399986755087e-05, + "loss": 0.33745670318603516, + "step": 1255 + }, + { + "epoch": 0.6207833930557272, + "grad_norm": 1.1397943957058634, + "learning_rate": 1.6527812444030118e-05, + "loss": 0.31651467084884644, + "step": 1256 + }, + { + "epoch": 0.6212776473495614, + "grad_norm": 1.141112365152985, + "learning_rate": 1.6521620662635053e-05, + "loss": 0.360455185174942, + "step": 1257 + }, + { + "epoch": 0.6217719016433956, + "grad_norm": 1.0000307812773819, + "learning_rate": 1.6515424527499214e-05, + "loss": 0.32819390296936035, + "step": 1258 + }, + { + "epoch": 0.6222661559372297, + "grad_norm": 1.229539015248975, + "learning_rate": 1.6509224042759053e-05, + "loss": 0.38759690523147583, + "step": 1259 + }, + { + "epoch": 0.6227604102310639, + "grad_norm": 1.127403937815861, + "learning_rate": 1.6503019212553932e-05, + "loss": 0.34250545501708984, + "step": 1260 + }, + { + "epoch": 0.6232546645248981, + "grad_norm": 1.0060644367410545, + "learning_rate": 1.6496810041026097e-05, + "loss": 0.3120163679122925, + "step": 1261 + }, + { + "epoch": 0.6237489188187323, + "grad_norm": 1.1050188267024101, + "learning_rate": 1.649059653232071e-05, + "loss": 0.35985836386680603, + "step": 1262 + }, + { + "epoch": 0.6242431731125664, + "grad_norm": 1.0877426950647728, + "learning_rate": 1.648437869058581e-05, + "loss": 0.3551288843154907, + "step": 1263 + }, + { + "epoch": 0.6247374274064006, + "grad_norm": 1.095568415742879, + "learning_rate": 1.6478156519972354e-05, + "loss": 0.33047816157341003, + "step": 1264 + }, + { + "epoch": 0.6252316817002348, + "grad_norm": 1.0643242802432207, + "learning_rate": 1.6471930024634164e-05, + "loss": 0.32909417152404785, + "step": 1265 + }, + { + "epoch": 0.6257259359940689, + "grad_norm": 1.07195158812182, + "learning_rate": 1.6465699208727964e-05, + "loss": 0.3726924657821655, + "step": 1266 + }, + { + "epoch": 0.6262201902879031, + "grad_norm": 1.1316893144153, + "learning_rate": 1.6459464076413355e-05, + "loss": 0.3569204807281494, + "step": 1267 + }, + { + "epoch": 0.6267144445817373, + "grad_norm": 1.0125649890138406, + "learning_rate": 1.6453224631852825e-05, + "loss": 0.33798107504844666, + "step": 1268 + }, + { + "epoch": 0.6272086988755715, + "grad_norm": 1.1537944647220344, + "learning_rate": 1.644698087921173e-05, + "loss": 0.32891637086868286, + "step": 1269 + }, + { + "epoch": 0.6277029531694056, + "grad_norm": 1.1246833616649612, + "learning_rate": 1.644073282265832e-05, + "loss": 0.31512969732284546, + "step": 1270 + }, + { + "epoch": 0.6281972074632398, + "grad_norm": 1.1199823464164773, + "learning_rate": 1.643448046636371e-05, + "loss": 0.350041925907135, + "step": 1271 + }, + { + "epoch": 0.628691461757074, + "grad_norm": 1.0925989435954497, + "learning_rate": 1.642822381450187e-05, + "loss": 0.3248854875564575, + "step": 1272 + }, + { + "epoch": 0.6291857160509082, + "grad_norm": 1.0344569444697491, + "learning_rate": 1.6421962871249662e-05, + "loss": 0.3031661808490753, + "step": 1273 + }, + { + "epoch": 0.6296799703447423, + "grad_norm": 1.0843035546126185, + "learning_rate": 1.6415697640786802e-05, + "loss": 0.2903754711151123, + "step": 1274 + }, + { + "epoch": 0.6301742246385765, + "grad_norm": 1.0122518499053432, + "learning_rate": 1.6409428127295864e-05, + "loss": 0.300454318523407, + "step": 1275 + }, + { + "epoch": 0.6306684789324107, + "grad_norm": 1.0842968830814483, + "learning_rate": 1.6403154334962286e-05, + "loss": 0.3430244028568268, + "step": 1276 + }, + { + "epoch": 0.6311627332262449, + "grad_norm": 1.1383634793407482, + "learning_rate": 1.6396876267974367e-05, + "loss": 0.3728436827659607, + "step": 1277 + }, + { + "epoch": 0.631656987520079, + "grad_norm": 1.103371729978927, + "learning_rate": 1.639059393052325e-05, + "loss": 0.3021183907985687, + "step": 1278 + }, + { + "epoch": 0.6321512418139132, + "grad_norm": 1.0649900935701406, + "learning_rate": 1.6384307326802934e-05, + "loss": 0.3313615918159485, + "step": 1279 + }, + { + "epoch": 0.6326454961077475, + "grad_norm": 1.0519110395000262, + "learning_rate": 1.637801646101027e-05, + "loss": 0.32833239436149597, + "step": 1280 + }, + { + "epoch": 0.6331397504015817, + "grad_norm": 1.1672616485147485, + "learning_rate": 1.6371721337344947e-05, + "loss": 0.3575769066810608, + "step": 1281 + }, + { + "epoch": 0.6336340046954158, + "grad_norm": 1.044512245658177, + "learning_rate": 1.6365421960009502e-05, + "loss": 0.33323729038238525, + "step": 1282 + }, + { + "epoch": 0.63412825898925, + "grad_norm": 1.150185694461945, + "learning_rate": 1.6359118333209307e-05, + "loss": 0.3522900938987732, + "step": 1283 + }, + { + "epoch": 0.6346225132830842, + "grad_norm": 1.2143932108960407, + "learning_rate": 1.635281046115257e-05, + "loss": 0.3350796699523926, + "step": 1284 + }, + { + "epoch": 0.6351167675769184, + "grad_norm": 1.2071815938700088, + "learning_rate": 1.6346498348050342e-05, + "loss": 0.350632905960083, + "step": 1285 + }, + { + "epoch": 0.6356110218707525, + "grad_norm": 1.0108749382306044, + "learning_rate": 1.6340181998116494e-05, + "loss": 0.2961253523826599, + "step": 1286 + }, + { + "epoch": 0.6361052761645867, + "grad_norm": 1.3686468141070485, + "learning_rate": 1.6333861415567736e-05, + "loss": 0.35736170411109924, + "step": 1287 + }, + { + "epoch": 0.6365995304584209, + "grad_norm": 1.1749750672779442, + "learning_rate": 1.63275366046236e-05, + "loss": 0.35654571652412415, + "step": 1288 + }, + { + "epoch": 0.6370937847522551, + "grad_norm": 1.0658003578898634, + "learning_rate": 1.6321207569506435e-05, + "loss": 0.30518224835395813, + "step": 1289 + }, + { + "epoch": 0.6375880390460892, + "grad_norm": 1.1007851387105425, + "learning_rate": 1.6314874314441413e-05, + "loss": 0.35099470615386963, + "step": 1290 + }, + { + "epoch": 0.6380822933399234, + "grad_norm": 1.0971286067217327, + "learning_rate": 1.6308536843656528e-05, + "loss": 0.3577536344528198, + "step": 1291 + }, + { + "epoch": 0.6385765476337576, + "grad_norm": 1.0395121014513669, + "learning_rate": 1.6302195161382586e-05, + "loss": 0.3141167163848877, + "step": 1292 + }, + { + "epoch": 0.6390708019275917, + "grad_norm": 0.981608659730199, + "learning_rate": 1.62958492718532e-05, + "loss": 0.2920055389404297, + "step": 1293 + }, + { + "epoch": 0.6395650562214259, + "grad_norm": 1.0875768517352407, + "learning_rate": 1.6289499179304797e-05, + "loss": 0.32826486229896545, + "step": 1294 + }, + { + "epoch": 0.6400593105152601, + "grad_norm": 1.0051851075633542, + "learning_rate": 1.628314488797661e-05, + "loss": 0.3080480992794037, + "step": 1295 + }, + { + "epoch": 0.6405535648090943, + "grad_norm": 1.006537470660458, + "learning_rate": 1.627678640211067e-05, + "loss": 0.304529070854187, + "step": 1296 + }, + { + "epoch": 0.6410478191029284, + "grad_norm": 1.1108978139615113, + "learning_rate": 1.627042372595181e-05, + "loss": 0.34653496742248535, + "step": 1297 + }, + { + "epoch": 0.6415420733967626, + "grad_norm": 0.9745027779333038, + "learning_rate": 1.6264056863747667e-05, + "loss": 0.2938673496246338, + "step": 1298 + }, + { + "epoch": 0.6420363276905968, + "grad_norm": 1.1585281714148792, + "learning_rate": 1.625768581974866e-05, + "loss": 0.32350343465805054, + "step": 1299 + }, + { + "epoch": 0.642530581984431, + "grad_norm": 1.0756982630474194, + "learning_rate": 1.6251310598208015e-05, + "loss": 0.3175384998321533, + "step": 1300 + }, + { + "epoch": 0.6430248362782651, + "grad_norm": 1.1335110071944674, + "learning_rate": 1.6244931203381734e-05, + "loss": 0.32667648792266846, + "step": 1301 + }, + { + "epoch": 0.6435190905720993, + "grad_norm": 0.9986052180267636, + "learning_rate": 1.623854763952861e-05, + "loss": 0.30110976099967957, + "step": 1302 + }, + { + "epoch": 0.6440133448659335, + "grad_norm": 1.2219754266907614, + "learning_rate": 1.6232159910910224e-05, + "loss": 0.3508617579936981, + "step": 1303 + }, + { + "epoch": 0.6445075991597677, + "grad_norm": 1.1027211796126624, + "learning_rate": 1.622576802179092e-05, + "loss": 0.34416183829307556, + "step": 1304 + }, + { + "epoch": 0.6450018534536018, + "grad_norm": 1.1267200023483468, + "learning_rate": 1.6219371976437847e-05, + "loss": 0.3509306311607361, + "step": 1305 + }, + { + "epoch": 0.645496107747436, + "grad_norm": 1.1746524244290708, + "learning_rate": 1.6212971779120904e-05, + "loss": 0.36186683177948, + "step": 1306 + }, + { + "epoch": 0.6459903620412702, + "grad_norm": 1.128374133277422, + "learning_rate": 1.6206567434112776e-05, + "loss": 0.3123924732208252, + "step": 1307 + }, + { + "epoch": 0.6464846163351045, + "grad_norm": 1.2141772034453755, + "learning_rate": 1.6200158945688907e-05, + "loss": 0.3691411018371582, + "step": 1308 + }, + { + "epoch": 0.6469788706289386, + "grad_norm": 1.1011618758034853, + "learning_rate": 1.6193746318127516e-05, + "loss": 0.3136986792087555, + "step": 1309 + }, + { + "epoch": 0.6474731249227728, + "grad_norm": 1.0883839992045683, + "learning_rate": 1.6187329555709585e-05, + "loss": 0.30374211072921753, + "step": 1310 + }, + { + "epoch": 0.647967379216607, + "grad_norm": 1.207837369942263, + "learning_rate": 1.618090866271884e-05, + "loss": 0.3633323907852173, + "step": 1311 + }, + { + "epoch": 0.6484616335104412, + "grad_norm": 1.056749654034174, + "learning_rate": 1.6174483643441795e-05, + "loss": 0.31395208835601807, + "step": 1312 + }, + { + "epoch": 0.6489558878042753, + "grad_norm": 1.0312943002596973, + "learning_rate": 1.6168054502167687e-05, + "loss": 0.29258471727371216, + "step": 1313 + }, + { + "epoch": 0.6494501420981095, + "grad_norm": 1.052844702612926, + "learning_rate": 1.6161621243188528e-05, + "loss": 0.3086007833480835, + "step": 1314 + }, + { + "epoch": 0.6499443963919437, + "grad_norm": 1.1099907156572013, + "learning_rate": 1.6155183870799063e-05, + "loss": 0.3604614734649658, + "step": 1315 + }, + { + "epoch": 0.6504386506857779, + "grad_norm": 1.230657559418624, + "learning_rate": 1.614874238929679e-05, + "loss": 0.3784678876399994, + "step": 1316 + }, + { + "epoch": 0.650932904979612, + "grad_norm": 0.9692609071600233, + "learning_rate": 1.6142296802981957e-05, + "loss": 0.29009610414505005, + "step": 1317 + }, + { + "epoch": 0.6514271592734462, + "grad_norm": 1.1385261282180998, + "learning_rate": 1.6135847116157542e-05, + "loss": 0.3667104244232178, + "step": 1318 + }, + { + "epoch": 0.6519214135672804, + "grad_norm": 1.0454111919656257, + "learning_rate": 1.6129393333129262e-05, + "loss": 0.3100985884666443, + "step": 1319 + }, + { + "epoch": 0.6524156678611145, + "grad_norm": 1.0967001531345488, + "learning_rate": 1.612293545820557e-05, + "loss": 0.34128522872924805, + "step": 1320 + }, + { + "epoch": 0.6529099221549487, + "grad_norm": 1.016572733864691, + "learning_rate": 1.611647349569765e-05, + "loss": 0.3017216920852661, + "step": 1321 + }, + { + "epoch": 0.6534041764487829, + "grad_norm": 1.0979244854260226, + "learning_rate": 1.611000744991942e-05, + "loss": 0.35060590505599976, + "step": 1322 + }, + { + "epoch": 0.6538984307426171, + "grad_norm": 1.180855026456707, + "learning_rate": 1.610353732518752e-05, + "loss": 0.3766549825668335, + "step": 1323 + }, + { + "epoch": 0.6543926850364512, + "grad_norm": 0.9954937284294141, + "learning_rate": 1.609706312582131e-05, + "loss": 0.2970678210258484, + "step": 1324 + }, + { + "epoch": 0.6548869393302854, + "grad_norm": 1.2407304893003468, + "learning_rate": 1.609058485614287e-05, + "loss": 0.3345789909362793, + "step": 1325 + }, + { + "epoch": 0.6553811936241196, + "grad_norm": 1.159801774337048, + "learning_rate": 1.608410252047701e-05, + "loss": 0.34838157892227173, + "step": 1326 + }, + { + "epoch": 0.6558754479179538, + "grad_norm": 1.052743453114199, + "learning_rate": 1.6077616123151232e-05, + "loss": 0.27454087138175964, + "step": 1327 + }, + { + "epoch": 0.6563697022117879, + "grad_norm": 1.1304513457691607, + "learning_rate": 1.607112566849577e-05, + "loss": 0.3372647762298584, + "step": 1328 + }, + { + "epoch": 0.6568639565056221, + "grad_norm": 1.1678098502989476, + "learning_rate": 1.606463116084356e-05, + "loss": 0.34433993697166443, + "step": 1329 + }, + { + "epoch": 0.6573582107994563, + "grad_norm": 1.0760327464429003, + "learning_rate": 1.6058132604530242e-05, + "loss": 0.3267759382724762, + "step": 1330 + }, + { + "epoch": 0.6578524650932905, + "grad_norm": 1.044029067228307, + "learning_rate": 1.6051630003894155e-05, + "loss": 0.3022347390651703, + "step": 1331 + }, + { + "epoch": 0.6583467193871246, + "grad_norm": 1.0701124312590375, + "learning_rate": 1.604512336327634e-05, + "loss": 0.32478266954421997, + "step": 1332 + }, + { + "epoch": 0.6588409736809588, + "grad_norm": 1.1194211733981758, + "learning_rate": 1.6038612687020548e-05, + "loss": 0.32039204239845276, + "step": 1333 + }, + { + "epoch": 0.659335227974793, + "grad_norm": 1.189072572166891, + "learning_rate": 1.6032097979473203e-05, + "loss": 0.3376410901546478, + "step": 1334 + }, + { + "epoch": 0.6598294822686273, + "grad_norm": 1.0209465387535948, + "learning_rate": 1.6025579244983443e-05, + "loss": 0.28432029485702515, + "step": 1335 + }, + { + "epoch": 0.6603237365624613, + "grad_norm": 1.1101085579973957, + "learning_rate": 1.6019056487903067e-05, + "loss": 0.3349001109600067, + "step": 1336 + }, + { + "epoch": 0.6608179908562956, + "grad_norm": 1.016991018325495, + "learning_rate": 1.601252971258658e-05, + "loss": 0.27995598316192627, + "step": 1337 + }, + { + "epoch": 0.6613122451501298, + "grad_norm": 1.0652875110729838, + "learning_rate": 1.6005998923391172e-05, + "loss": 0.28326892852783203, + "step": 1338 + }, + { + "epoch": 0.661806499443964, + "grad_norm": 1.1089400050162956, + "learning_rate": 1.5999464124676697e-05, + "loss": 0.3139200806617737, + "step": 1339 + }, + { + "epoch": 0.6623007537377981, + "grad_norm": 1.0857703956199403, + "learning_rate": 1.5992925320805688e-05, + "loss": 0.32395505905151367, + "step": 1340 + }, + { + "epoch": 0.6627950080316323, + "grad_norm": 1.187400707476865, + "learning_rate": 1.598638251614337e-05, + "loss": 0.35880255699157715, + "step": 1341 + }, + { + "epoch": 0.6632892623254665, + "grad_norm": 1.1264632686384342, + "learning_rate": 1.5979835715057616e-05, + "loss": 0.3696775436401367, + "step": 1342 + }, + { + "epoch": 0.6637835166193007, + "grad_norm": 1.2084738763641774, + "learning_rate": 1.597328492191898e-05, + "loss": 0.38413193821907043, + "step": 1343 + }, + { + "epoch": 0.6642777709131348, + "grad_norm": 2.0572947223290017, + "learning_rate": 1.596673014110068e-05, + "loss": 0.3564830720424652, + "step": 1344 + }, + { + "epoch": 0.664772025206969, + "grad_norm": 1.0170026931569898, + "learning_rate": 1.5960171376978587e-05, + "loss": 0.30634552240371704, + "step": 1345 + }, + { + "epoch": 0.6652662795008032, + "grad_norm": 1.0375692111937291, + "learning_rate": 1.595360863393125e-05, + "loss": 0.27113068103790283, + "step": 1346 + }, + { + "epoch": 0.6657605337946373, + "grad_norm": 1.242773829739391, + "learning_rate": 1.594704191633985e-05, + "loss": 0.34015512466430664, + "step": 1347 + }, + { + "epoch": 0.6662547880884715, + "grad_norm": 0.9724222230737607, + "learning_rate": 1.594047122858824e-05, + "loss": 0.2509229779243469, + "step": 1348 + }, + { + "epoch": 0.6667490423823057, + "grad_norm": 1.0705371704599513, + "learning_rate": 1.5933896575062922e-05, + "loss": 0.35122111439704895, + "step": 1349 + }, + { + "epoch": 0.6672432966761399, + "grad_norm": 1.0469402955634624, + "learning_rate": 1.592731796015303e-05, + "loss": 0.3656314015388489, + "step": 1350 + }, + { + "epoch": 0.667737550969974, + "grad_norm": 1.0980190562444532, + "learning_rate": 1.5920735388250363e-05, + "loss": 0.3482551574707031, + "step": 1351 + }, + { + "epoch": 0.6682318052638082, + "grad_norm": 0.9987728958846398, + "learning_rate": 1.5914148863749344e-05, + "loss": 0.2852175831794739, + "step": 1352 + }, + { + "epoch": 0.6687260595576424, + "grad_norm": 1.1231968462948256, + "learning_rate": 1.590755839104705e-05, + "loss": 0.3435940742492676, + "step": 1353 + }, + { + "epoch": 0.6692203138514766, + "grad_norm": 1.2334019463480403, + "learning_rate": 1.590096397454318e-05, + "loss": 0.34816527366638184, + "step": 1354 + }, + { + "epoch": 0.6697145681453107, + "grad_norm": 1.4472355399081582, + "learning_rate": 1.5894365618640077e-05, + "loss": 0.3283170461654663, + "step": 1355 + }, + { + "epoch": 0.6702088224391449, + "grad_norm": 1.1520168978191874, + "learning_rate": 1.588776332774271e-05, + "loss": 0.335905522108078, + "step": 1356 + }, + { + "epoch": 0.6707030767329791, + "grad_norm": 1.1244736910598108, + "learning_rate": 1.5881157106258666e-05, + "loss": 0.3055316209793091, + "step": 1357 + }, + { + "epoch": 0.6711973310268133, + "grad_norm": 1.050666765324263, + "learning_rate": 1.5874546958598172e-05, + "loss": 0.2873142659664154, + "step": 1358 + }, + { + "epoch": 0.6716915853206474, + "grad_norm": 1.0218331884680711, + "learning_rate": 1.586793288917406e-05, + "loss": 0.29659712314605713, + "step": 1359 + }, + { + "epoch": 0.6721858396144816, + "grad_norm": 1.0827802259474617, + "learning_rate": 1.5861314902401802e-05, + "loss": 0.33081990480422974, + "step": 1360 + }, + { + "epoch": 0.6726800939083158, + "grad_norm": 1.2140107638410536, + "learning_rate": 1.5854693002699457e-05, + "loss": 0.3559015691280365, + "step": 1361 + }, + { + "epoch": 0.67317434820215, + "grad_norm": 1.1424828520826207, + "learning_rate": 1.584806719448772e-05, + "loss": 0.3353438973426819, + "step": 1362 + }, + { + "epoch": 0.6736686024959841, + "grad_norm": 1.0533009951881467, + "learning_rate": 1.5841437482189882e-05, + "loss": 0.3320685923099518, + "step": 1363 + }, + { + "epoch": 0.6741628567898184, + "grad_norm": 1.0600254033440624, + "learning_rate": 1.5834803870231846e-05, + "loss": 0.3070179224014282, + "step": 1364 + }, + { + "epoch": 0.6746571110836526, + "grad_norm": 1.0452219544938475, + "learning_rate": 1.5828166363042115e-05, + "loss": 0.28779780864715576, + "step": 1365 + }, + { + "epoch": 0.6751513653774868, + "grad_norm": 0.9932658974656241, + "learning_rate": 1.5821524965051793e-05, + "loss": 0.2793114185333252, + "step": 1366 + }, + { + "epoch": 0.6756456196713209, + "grad_norm": 1.117744874079583, + "learning_rate": 1.5814879680694585e-05, + "loss": 0.3586357831954956, + "step": 1367 + }, + { + "epoch": 0.6761398739651551, + "grad_norm": 1.122494918770383, + "learning_rate": 1.5808230514406786e-05, + "loss": 0.35258832573890686, + "step": 1368 + }, + { + "epoch": 0.6766341282589893, + "grad_norm": 1.0624893424167818, + "learning_rate": 1.5801577470627286e-05, + "loss": 0.2783607840538025, + "step": 1369 + }, + { + "epoch": 0.6771283825528235, + "grad_norm": 1.217710803865883, + "learning_rate": 1.579492055379756e-05, + "loss": 0.3494858741760254, + "step": 1370 + }, + { + "epoch": 0.6776226368466576, + "grad_norm": 1.1913846811426898, + "learning_rate": 1.578825976836167e-05, + "loss": 0.34512561559677124, + "step": 1371 + }, + { + "epoch": 0.6781168911404918, + "grad_norm": 1.0303182849177774, + "learning_rate": 1.5781595118766265e-05, + "loss": 0.2923341989517212, + "step": 1372 + }, + { + "epoch": 0.678611145434326, + "grad_norm": 1.0423481220482165, + "learning_rate": 1.5774926609460566e-05, + "loss": 0.3078833818435669, + "step": 1373 + }, + { + "epoch": 0.6791053997281601, + "grad_norm": 1.0871141007271816, + "learning_rate": 1.576825424489638e-05, + "loss": 0.3147008419036865, + "step": 1374 + }, + { + "epoch": 0.6795996540219943, + "grad_norm": 1.0340836184197277, + "learning_rate": 1.576157802952807e-05, + "loss": 0.2907789349555969, + "step": 1375 + }, + { + "epoch": 0.6800939083158285, + "grad_norm": 1.1801114991913197, + "learning_rate": 1.57548979678126e-05, + "loss": 0.2941555976867676, + "step": 1376 + }, + { + "epoch": 0.6805881626096627, + "grad_norm": 1.137398706652914, + "learning_rate": 1.5748214064209473e-05, + "loss": 0.3452342748641968, + "step": 1377 + }, + { + "epoch": 0.6810824169034968, + "grad_norm": 0.9870368606552603, + "learning_rate": 1.5741526323180765e-05, + "loss": 0.31481361389160156, + "step": 1378 + }, + { + "epoch": 0.681576671197331, + "grad_norm": 1.1734004344416635, + "learning_rate": 1.573483474919112e-05, + "loss": 0.3403349220752716, + "step": 1379 + }, + { + "epoch": 0.6820709254911652, + "grad_norm": 1.3661262290783491, + "learning_rate": 1.572813934670774e-05, + "loss": 0.3283364176750183, + "step": 1380 + }, + { + "epoch": 0.6825651797849994, + "grad_norm": 1.0790334315781973, + "learning_rate": 1.5721440120200376e-05, + "loss": 0.3294883966445923, + "step": 1381 + }, + { + "epoch": 0.6830594340788335, + "grad_norm": 1.057215667272423, + "learning_rate": 1.5714737074141338e-05, + "loss": 0.3087981343269348, + "step": 1382 + }, + { + "epoch": 0.6835536883726677, + "grad_norm": 0.9953380542206125, + "learning_rate": 1.570803021300548e-05, + "loss": 0.29511693120002747, + "step": 1383 + }, + { + "epoch": 0.6840479426665019, + "grad_norm": 1.1147415286539601, + "learning_rate": 1.570131954127021e-05, + "loss": 0.3620823323726654, + "step": 1384 + }, + { + "epoch": 0.6845421969603361, + "grad_norm": 1.2518358127130127, + "learning_rate": 1.5694605063415477e-05, + "loss": 0.3978300988674164, + "step": 1385 + }, + { + "epoch": 0.6850364512541702, + "grad_norm": 1.2104388988265296, + "learning_rate": 1.5687886783923773e-05, + "loss": 0.35367661714553833, + "step": 1386 + }, + { + "epoch": 0.6855307055480044, + "grad_norm": 1.158470270474232, + "learning_rate": 1.5681164707280117e-05, + "loss": 0.3313448131084442, + "step": 1387 + }, + { + "epoch": 0.6860249598418386, + "grad_norm": 1.1312206183637163, + "learning_rate": 1.5674438837972077e-05, + "loss": 0.34115713834762573, + "step": 1388 + }, + { + "epoch": 0.6865192141356729, + "grad_norm": 1.071906380475402, + "learning_rate": 1.566770918048975e-05, + "loss": 0.311326265335083, + "step": 1389 + }, + { + "epoch": 0.687013468429507, + "grad_norm": 1.0496646406815568, + "learning_rate": 1.5660975739325755e-05, + "loss": 0.32622700929641724, + "step": 1390 + }, + { + "epoch": 0.6875077227233412, + "grad_norm": 1.1530479303397307, + "learning_rate": 1.565423851897524e-05, + "loss": 0.36029747128486633, + "step": 1391 + }, + { + "epoch": 0.6880019770171754, + "grad_norm": 0.9691306195768644, + "learning_rate": 1.5647497523935883e-05, + "loss": 0.2771177291870117, + "step": 1392 + }, + { + "epoch": 0.6884962313110096, + "grad_norm": 1.1450942478438548, + "learning_rate": 1.5640752758707868e-05, + "loss": 0.3474002182483673, + "step": 1393 + }, + { + "epoch": 0.6889904856048437, + "grad_norm": 1.09850595363495, + "learning_rate": 1.563400422779391e-05, + "loss": 0.28006255626678467, + "step": 1394 + }, + { + "epoch": 0.6894847398986779, + "grad_norm": 1.0953635794573913, + "learning_rate": 1.562725193569923e-05, + "loss": 0.32151490449905396, + "step": 1395 + }, + { + "epoch": 0.6899789941925121, + "grad_norm": 1.1995785901348681, + "learning_rate": 1.5620495886931557e-05, + "loss": 0.3081187903881073, + "step": 1396 + }, + { + "epoch": 0.6904732484863463, + "grad_norm": 1.1390576796125735, + "learning_rate": 1.561373608600114e-05, + "loss": 0.3158992826938629, + "step": 1397 + }, + { + "epoch": 0.6909675027801804, + "grad_norm": 1.1783652693752096, + "learning_rate": 1.5606972537420723e-05, + "loss": 0.33790335059165955, + "step": 1398 + }, + { + "epoch": 0.6914617570740146, + "grad_norm": 1.1733705340509706, + "learning_rate": 1.5600205245705553e-05, + "loss": 0.3157292902469635, + "step": 1399 + }, + { + "epoch": 0.6919560113678488, + "grad_norm": 1.1674234642263648, + "learning_rate": 1.559343421537338e-05, + "loss": 0.31090572476387024, + "step": 1400 + }, + { + "epoch": 0.6924502656616829, + "grad_norm": 1.1604041250760992, + "learning_rate": 1.5586659450944443e-05, + "loss": 0.30499958992004395, + "step": 1401 + }, + { + "epoch": 0.6929445199555171, + "grad_norm": 1.0713722972416724, + "learning_rate": 1.5579880956941478e-05, + "loss": 0.3036794662475586, + "step": 1402 + }, + { + "epoch": 0.6934387742493513, + "grad_norm": 1.1543376848490539, + "learning_rate": 1.5573098737889716e-05, + "loss": 0.26514700055122375, + "step": 1403 + }, + { + "epoch": 0.6939330285431855, + "grad_norm": 1.0755683699565965, + "learning_rate": 1.5566312798316867e-05, + "loss": 0.31947457790374756, + "step": 1404 + }, + { + "epoch": 0.6944272828370196, + "grad_norm": 1.1317886658483896, + "learning_rate": 1.5559523142753124e-05, + "loss": 0.29387322068214417, + "step": 1405 + }, + { + "epoch": 0.6949215371308538, + "grad_norm": 1.117372828260635, + "learning_rate": 1.555272977573117e-05, + "loss": 0.33459946513175964, + "step": 1406 + }, + { + "epoch": 0.695415791424688, + "grad_norm": 1.2196871082649428, + "learning_rate": 1.5545932701786154e-05, + "loss": 0.31394320726394653, + "step": 1407 + }, + { + "epoch": 0.6959100457185222, + "grad_norm": 1.0669033993360486, + "learning_rate": 1.5539131925455713e-05, + "loss": 0.2891885042190552, + "step": 1408 + }, + { + "epoch": 0.6964043000123563, + "grad_norm": 1.2475463319045528, + "learning_rate": 1.5532327451279938e-05, + "loss": 0.33686599135398865, + "step": 1409 + }, + { + "epoch": 0.6968985543061905, + "grad_norm": 1.0648029492831064, + "learning_rate": 1.5525519283801405e-05, + "loss": 0.31463146209716797, + "step": 1410 + }, + { + "epoch": 0.6973928086000247, + "grad_norm": 1.226099759538899, + "learning_rate": 1.5518707427565146e-05, + "loss": 0.3598940372467041, + "step": 1411 + }, + { + "epoch": 0.6978870628938589, + "grad_norm": 1.149083094787804, + "learning_rate": 1.5511891887118665e-05, + "loss": 0.32980066537857056, + "step": 1412 + }, + { + "epoch": 0.698381317187693, + "grad_norm": 1.1872142618250514, + "learning_rate": 1.5505072667011915e-05, + "loss": 0.3264961242675781, + "step": 1413 + }, + { + "epoch": 0.6988755714815272, + "grad_norm": 1.0604770012284015, + "learning_rate": 1.549824977179731e-05, + "loss": 0.3355519771575928, + "step": 1414 + }, + { + "epoch": 0.6993698257753614, + "grad_norm": 1.0119765938601295, + "learning_rate": 1.5491423206029717e-05, + "loss": 0.27073174715042114, + "step": 1415 + }, + { + "epoch": 0.6998640800691956, + "grad_norm": 1.1356545279602395, + "learning_rate": 1.5484592974266456e-05, + "loss": 0.32638323307037354, + "step": 1416 + }, + { + "epoch": 0.7003583343630297, + "grad_norm": 1.192307972564017, + "learning_rate": 1.5477759081067288e-05, + "loss": 0.38844019174575806, + "step": 1417 + }, + { + "epoch": 0.700852588656864, + "grad_norm": 1.1060104448967631, + "learning_rate": 1.5470921530994426e-05, + "loss": 0.3386498689651489, + "step": 1418 + }, + { + "epoch": 0.7013468429506982, + "grad_norm": 1.113333245203903, + "learning_rate": 1.5464080328612522e-05, + "loss": 0.3304392993450165, + "step": 1419 + }, + { + "epoch": 0.7018410972445324, + "grad_norm": 1.1024158772042199, + "learning_rate": 1.545723547848866e-05, + "loss": 0.314837247133255, + "step": 1420 + }, + { + "epoch": 0.7023353515383665, + "grad_norm": 0.9888192419219921, + "learning_rate": 1.5450386985192368e-05, + "loss": 0.30135127902030945, + "step": 1421 + }, + { + "epoch": 0.7028296058322007, + "grad_norm": 1.0640354824874358, + "learning_rate": 1.5443534853295602e-05, + "loss": 0.29176798462867737, + "step": 1422 + }, + { + "epoch": 0.7033238601260349, + "grad_norm": 1.3021824252266967, + "learning_rate": 1.5436679087372746e-05, + "loss": 0.36438125371932983, + "step": 1423 + }, + { + "epoch": 0.703818114419869, + "grad_norm": 1.1147780995478658, + "learning_rate": 1.542981969200061e-05, + "loss": 0.37140434980392456, + "step": 1424 + }, + { + "epoch": 0.7043123687137032, + "grad_norm": 1.3176538326023695, + "learning_rate": 1.542295667175843e-05, + "loss": 0.36072903871536255, + "step": 1425 + }, + { + "epoch": 0.7048066230075374, + "grad_norm": 1.1262882885574772, + "learning_rate": 1.5416090031227868e-05, + "loss": 0.3266327977180481, + "step": 1426 + }, + { + "epoch": 0.7053008773013716, + "grad_norm": 1.0179565917308762, + "learning_rate": 1.5409219774992978e-05, + "loss": 0.3081423342227936, + "step": 1427 + }, + { + "epoch": 0.7057951315952057, + "grad_norm": 1.3034313694807904, + "learning_rate": 1.5402345907640262e-05, + "loss": 0.3571197986602783, + "step": 1428 + }, + { + "epoch": 0.7062893858890399, + "grad_norm": 1.1385888315844002, + "learning_rate": 1.5395468433758604e-05, + "loss": 0.32380104064941406, + "step": 1429 + }, + { + "epoch": 0.7067836401828741, + "grad_norm": 1.0129718670355197, + "learning_rate": 1.5388587357939313e-05, + "loss": 0.33777546882629395, + "step": 1430 + }, + { + "epoch": 0.7072778944767083, + "grad_norm": 1.0997780610685683, + "learning_rate": 1.5381702684776093e-05, + "loss": 0.31793370842933655, + "step": 1431 + }, + { + "epoch": 0.7077721487705424, + "grad_norm": 1.065324744616134, + "learning_rate": 1.537481441886506e-05, + "loss": 0.3282355070114136, + "step": 1432 + }, + { + "epoch": 0.7082664030643766, + "grad_norm": 1.1740655706878367, + "learning_rate": 1.5367922564804716e-05, + "loss": 0.3523057699203491, + "step": 1433 + }, + { + "epoch": 0.7087606573582108, + "grad_norm": 1.1790295388685894, + "learning_rate": 1.5361027127195964e-05, + "loss": 0.36351460218429565, + "step": 1434 + }, + { + "epoch": 0.709254911652045, + "grad_norm": 2.2339320260763373, + "learning_rate": 1.5354128110642102e-05, + "loss": 0.2936401963233948, + "step": 1435 + }, + { + "epoch": 0.7097491659458791, + "grad_norm": 1.1080576186798932, + "learning_rate": 1.5347225519748818e-05, + "loss": 0.3178175091743469, + "step": 1436 + }, + { + "epoch": 0.7102434202397133, + "grad_norm": 1.1375761171495609, + "learning_rate": 1.5340319359124177e-05, + "loss": 0.3098832666873932, + "step": 1437 + }, + { + "epoch": 0.7107376745335475, + "grad_norm": 0.951807024133746, + "learning_rate": 1.5333409633378633e-05, + "loss": 0.2644941806793213, + "step": 1438 + }, + { + "epoch": 0.7112319288273817, + "grad_norm": 1.1193499530101132, + "learning_rate": 1.5326496347125027e-05, + "loss": 0.3046286702156067, + "step": 1439 + }, + { + "epoch": 0.7117261831212158, + "grad_norm": 1.1009971048909013, + "learning_rate": 1.5319579504978567e-05, + "loss": 0.33757925033569336, + "step": 1440 + }, + { + "epoch": 0.71222043741505, + "grad_norm": 1.1415644120008137, + "learning_rate": 1.5312659111556832e-05, + "loss": 0.3470202684402466, + "step": 1441 + }, + { + "epoch": 0.7127146917088842, + "grad_norm": 1.0829483976260892, + "learning_rate": 1.5305735171479785e-05, + "loss": 0.3310868740081787, + "step": 1442 + }, + { + "epoch": 0.7132089460027184, + "grad_norm": 1.2738694792524405, + "learning_rate": 1.529880768936975e-05, + "loss": 0.31649407744407654, + "step": 1443 + }, + { + "epoch": 0.7137032002965525, + "grad_norm": 1.0510301649062292, + "learning_rate": 1.5291876669851408e-05, + "loss": 0.2986135184764862, + "step": 1444 + }, + { + "epoch": 0.7141974545903867, + "grad_norm": 1.1622525691797543, + "learning_rate": 1.5284942117551817e-05, + "loss": 0.3033408224582672, + "step": 1445 + }, + { + "epoch": 0.714691708884221, + "grad_norm": 1.1648719329133883, + "learning_rate": 1.5278004037100378e-05, + "loss": 0.34231680631637573, + "step": 1446 + }, + { + "epoch": 0.7151859631780552, + "grad_norm": 1.1347301204641653, + "learning_rate": 1.5271062433128857e-05, + "loss": 0.3273579478263855, + "step": 1447 + }, + { + "epoch": 0.7156802174718893, + "grad_norm": 1.2307292916383785, + "learning_rate": 1.5264117310271372e-05, + "loss": 0.344064861536026, + "step": 1448 + }, + { + "epoch": 0.7161744717657235, + "grad_norm": 1.0685505855741966, + "learning_rate": 1.5257168673164384e-05, + "loss": 0.3131038546562195, + "step": 1449 + }, + { + "epoch": 0.7166687260595577, + "grad_norm": 1.1403948273488542, + "learning_rate": 1.5250216526446708e-05, + "loss": 0.32794755697250366, + "step": 1450 + }, + { + "epoch": 0.7171629803533918, + "grad_norm": 1.2597097116316462, + "learning_rate": 1.5243260874759494e-05, + "loss": 0.3633842468261719, + "step": 1451 + }, + { + "epoch": 0.717657234647226, + "grad_norm": 0.943013995379639, + "learning_rate": 1.5236301722746235e-05, + "loss": 0.24650251865386963, + "step": 1452 + }, + { + "epoch": 0.7181514889410602, + "grad_norm": 1.1777840335640666, + "learning_rate": 1.5229339075052769e-05, + "loss": 0.34167230129241943, + "step": 1453 + }, + { + "epoch": 0.7186457432348944, + "grad_norm": 1.0945051908887762, + "learning_rate": 1.522237293632725e-05, + "loss": 0.29454126954078674, + "step": 1454 + }, + { + "epoch": 0.7191399975287285, + "grad_norm": 1.1517995676673816, + "learning_rate": 1.5215403311220178e-05, + "loss": 0.3709314465522766, + "step": 1455 + }, + { + "epoch": 0.7196342518225627, + "grad_norm": 1.1421076533752808, + "learning_rate": 1.5208430204384377e-05, + "loss": 0.3543916642665863, + "step": 1456 + }, + { + "epoch": 0.7201285061163969, + "grad_norm": 1.1924648010793302, + "learning_rate": 1.5201453620474986e-05, + "loss": 0.33827707171440125, + "step": 1457 + }, + { + "epoch": 0.7206227604102311, + "grad_norm": 1.1616070041381745, + "learning_rate": 1.5194473564149484e-05, + "loss": 0.31289514899253845, + "step": 1458 + }, + { + "epoch": 0.7211170147040652, + "grad_norm": 1.1655875507968474, + "learning_rate": 1.5187490040067646e-05, + "loss": 0.3345657289028168, + "step": 1459 + }, + { + "epoch": 0.7216112689978994, + "grad_norm": 1.091971369166992, + "learning_rate": 1.5180503052891578e-05, + "loss": 0.3322404623031616, + "step": 1460 + }, + { + "epoch": 0.7221055232917336, + "grad_norm": 1.0009476128919939, + "learning_rate": 1.5173512607285692e-05, + "loss": 0.31120461225509644, + "step": 1461 + }, + { + "epoch": 0.7225997775855678, + "grad_norm": 1.140979323325151, + "learning_rate": 1.5166518707916714e-05, + "loss": 0.3388645648956299, + "step": 1462 + }, + { + "epoch": 0.7230940318794019, + "grad_norm": 1.098469502784105, + "learning_rate": 1.5159521359453661e-05, + "loss": 0.3048557639122009, + "step": 1463 + }, + { + "epoch": 0.7235882861732361, + "grad_norm": 1.0437743408474436, + "learning_rate": 1.5152520566567873e-05, + "loss": 0.32128047943115234, + "step": 1464 + }, + { + "epoch": 0.7240825404670703, + "grad_norm": 1.0754519434907805, + "learning_rate": 1.5145516333932973e-05, + "loss": 0.3016900420188904, + "step": 1465 + }, + { + "epoch": 0.7245767947609045, + "grad_norm": 0.9730419604339762, + "learning_rate": 1.5138508666224892e-05, + "loss": 0.27410340309143066, + "step": 1466 + }, + { + "epoch": 0.7250710490547386, + "grad_norm": 1.1548137674896846, + "learning_rate": 1.513149756812184e-05, + "loss": 0.314311146736145, + "step": 1467 + }, + { + "epoch": 0.7255653033485728, + "grad_norm": 1.0652992161056178, + "learning_rate": 1.5124483044304339e-05, + "loss": 0.300488144159317, + "step": 1468 + }, + { + "epoch": 0.726059557642407, + "grad_norm": 1.0437811199768454, + "learning_rate": 1.5117465099455173e-05, + "loss": 0.2610424757003784, + "step": 1469 + }, + { + "epoch": 0.7265538119362412, + "grad_norm": 1.0473843452456588, + "learning_rate": 1.5110443738259425e-05, + "loss": 0.2631368637084961, + "step": 1470 + }, + { + "epoch": 0.7270480662300753, + "grad_norm": 1.1572872923696271, + "learning_rate": 1.510341896540446e-05, + "loss": 0.2894716262817383, + "step": 1471 + }, + { + "epoch": 0.7275423205239095, + "grad_norm": 1.1539682565039295, + "learning_rate": 1.5096390785579913e-05, + "loss": 0.2859206199645996, + "step": 1472 + }, + { + "epoch": 0.7280365748177438, + "grad_norm": 1.1861776477785995, + "learning_rate": 1.5089359203477693e-05, + "loss": 0.2966008484363556, + "step": 1473 + }, + { + "epoch": 0.728530829111578, + "grad_norm": 1.0911088494470613, + "learning_rate": 1.5082324223791988e-05, + "loss": 0.3187675476074219, + "step": 1474 + }, + { + "epoch": 0.729025083405412, + "grad_norm": 1.1920802680772398, + "learning_rate": 1.507528585121925e-05, + "loss": 0.32434171438217163, + "step": 1475 + }, + { + "epoch": 0.7295193376992463, + "grad_norm": 1.233732485912319, + "learning_rate": 1.5068244090458197e-05, + "loss": 0.3518364429473877, + "step": 1476 + }, + { + "epoch": 0.7300135919930805, + "grad_norm": 1.091189612496036, + "learning_rate": 1.50611989462098e-05, + "loss": 0.32294291257858276, + "step": 1477 + }, + { + "epoch": 0.7305078462869146, + "grad_norm": 1.184027940449126, + "learning_rate": 1.5054150423177307e-05, + "loss": 0.3413415253162384, + "step": 1478 + }, + { + "epoch": 0.7310021005807488, + "grad_norm": 1.1760745568840743, + "learning_rate": 1.5047098526066207e-05, + "loss": 0.3562566637992859, + "step": 1479 + }, + { + "epoch": 0.731496354874583, + "grad_norm": 1.130494844464842, + "learning_rate": 1.504004325958424e-05, + "loss": 0.30018410086631775, + "step": 1480 + }, + { + "epoch": 0.7319906091684172, + "grad_norm": 1.027268124102698, + "learning_rate": 1.5032984628441409e-05, + "loss": 0.2937701344490051, + "step": 1481 + }, + { + "epoch": 0.7324848634622513, + "grad_norm": 1.131154387943882, + "learning_rate": 1.5025922637349953e-05, + "loss": 0.3268740773200989, + "step": 1482 + }, + { + "epoch": 0.7329791177560855, + "grad_norm": 1.053089747814938, + "learning_rate": 1.5018857291024356e-05, + "loss": 0.3246314525604248, + "step": 1483 + }, + { + "epoch": 0.7334733720499197, + "grad_norm": 1.033026683314433, + "learning_rate": 1.501178859418134e-05, + "loss": 0.276904433965683, + "step": 1484 + }, + { + "epoch": 0.7339676263437539, + "grad_norm": 1.1901915790154476, + "learning_rate": 1.5004716551539873e-05, + "loss": 0.27665287256240845, + "step": 1485 + }, + { + "epoch": 0.734461880637588, + "grad_norm": 1.065690181516995, + "learning_rate": 1.4997641167821143e-05, + "loss": 0.325985848903656, + "step": 1486 + }, + { + "epoch": 0.7349561349314222, + "grad_norm": 1.2333398180696593, + "learning_rate": 1.4990562447748573e-05, + "loss": 0.2951817214488983, + "step": 1487 + }, + { + "epoch": 0.7354503892252564, + "grad_norm": 1.0415622998394476, + "learning_rate": 1.4983480396047822e-05, + "loss": 0.2592772841453552, + "step": 1488 + }, + { + "epoch": 0.7359446435190906, + "grad_norm": 1.0977128928049222, + "learning_rate": 1.4976395017446767e-05, + "loss": 0.3278253674507141, + "step": 1489 + }, + { + "epoch": 0.7364388978129247, + "grad_norm": 2.4840016288238886, + "learning_rate": 1.4969306316675497e-05, + "loss": 0.32366445660591125, + "step": 1490 + }, + { + "epoch": 0.7369331521067589, + "grad_norm": 1.065618785924185, + "learning_rate": 1.4962214298466337e-05, + "loss": 0.30544513463974, + "step": 1491 + }, + { + "epoch": 0.7374274064005931, + "grad_norm": 1.1151764286390358, + "learning_rate": 1.4955118967553812e-05, + "loss": 0.3712898790836334, + "step": 1492 + }, + { + "epoch": 0.7379216606944273, + "grad_norm": 1.072095940180716, + "learning_rate": 1.4948020328674662e-05, + "loss": 0.3006438612937927, + "step": 1493 + }, + { + "epoch": 0.7384159149882614, + "grad_norm": 1.1145573413296936, + "learning_rate": 1.494091838656784e-05, + "loss": 0.3494953215122223, + "step": 1494 + }, + { + "epoch": 0.7389101692820956, + "grad_norm": 1.091824613740768, + "learning_rate": 1.4933813145974504e-05, + "loss": 0.2698785662651062, + "step": 1495 + }, + { + "epoch": 0.7394044235759298, + "grad_norm": 1.1072713673032075, + "learning_rate": 1.4926704611638003e-05, + "loss": 0.34775635600090027, + "step": 1496 + }, + { + "epoch": 0.739898677869764, + "grad_norm": 1.1542085278706422, + "learning_rate": 1.4919592788303898e-05, + "loss": 0.328175812959671, + "step": 1497 + }, + { + "epoch": 0.7403929321635981, + "grad_norm": 1.1735161292651393, + "learning_rate": 1.491247768071994e-05, + "loss": 0.3320178687572479, + "step": 1498 + }, + { + "epoch": 0.7408871864574323, + "grad_norm": 1.8687355330582882, + "learning_rate": 1.4905359293636074e-05, + "loss": 0.308150053024292, + "step": 1499 + }, + { + "epoch": 0.7413814407512666, + "grad_norm": 1.1422704685641505, + "learning_rate": 1.489823763180443e-05, + "loss": 0.3311570882797241, + "step": 1500 + }, + { + "epoch": 0.7418756950451008, + "grad_norm": 1.2844910379105308, + "learning_rate": 1.4891112699979334e-05, + "loss": 0.36916327476501465, + "step": 1501 + }, + { + "epoch": 0.7423699493389349, + "grad_norm": 1.0354244070195735, + "learning_rate": 1.4883984502917286e-05, + "loss": 0.28005337715148926, + "step": 1502 + }, + { + "epoch": 0.7428642036327691, + "grad_norm": 1.2241818166146565, + "learning_rate": 1.4876853045376962e-05, + "loss": 0.3502781391143799, + "step": 1503 + }, + { + "epoch": 0.7433584579266033, + "grad_norm": 1.2448349850537428, + "learning_rate": 1.4869718332119232e-05, + "loss": 0.32032880187034607, + "step": 1504 + }, + { + "epoch": 0.7438527122204374, + "grad_norm": 1.1236679189592251, + "learning_rate": 1.4862580367907118e-05, + "loss": 0.3229472041130066, + "step": 1505 + }, + { + "epoch": 0.7443469665142716, + "grad_norm": 1.087360074547477, + "learning_rate": 1.4855439157505833e-05, + "loss": 0.2725368142127991, + "step": 1506 + }, + { + "epoch": 0.7448412208081058, + "grad_norm": 1.2509876854452482, + "learning_rate": 1.4848294705682737e-05, + "loss": 0.35358861088752747, + "step": 1507 + }, + { + "epoch": 0.74533547510194, + "grad_norm": 1.0843196708603702, + "learning_rate": 1.4841147017207376e-05, + "loss": 0.299206018447876, + "step": 1508 + }, + { + "epoch": 0.7458297293957741, + "grad_norm": 2.7618594064377384, + "learning_rate": 1.4833996096851432e-05, + "loss": 0.32004314661026, + "step": 1509 + }, + { + "epoch": 0.7463239836896083, + "grad_norm": 1.1399779760270892, + "learning_rate": 1.4826841949388767e-05, + "loss": 0.32800590991973877, + "step": 1510 + }, + { + "epoch": 0.7468182379834425, + "grad_norm": 1.112132363505793, + "learning_rate": 1.4819684579595382e-05, + "loss": 0.2916460335254669, + "step": 1511 + }, + { + "epoch": 0.7473124922772767, + "grad_norm": 1.2041472096070427, + "learning_rate": 1.4812523992249437e-05, + "loss": 0.3276118338108063, + "step": 1512 + }, + { + "epoch": 0.7478067465711108, + "grad_norm": 1.2310079375510266, + "learning_rate": 1.4805360192131234e-05, + "loss": 0.34718069434165955, + "step": 1513 + }, + { + "epoch": 0.748301000864945, + "grad_norm": 1.0130113878676084, + "learning_rate": 1.4798193184023233e-05, + "loss": 0.2810167372226715, + "step": 1514 + }, + { + "epoch": 0.7487952551587792, + "grad_norm": 1.1600230287701154, + "learning_rate": 1.4791022972710017e-05, + "loss": 0.3542296886444092, + "step": 1515 + }, + { + "epoch": 0.7492895094526134, + "grad_norm": 1.0717623685966582, + "learning_rate": 1.4783849562978319e-05, + "loss": 0.27578431367874146, + "step": 1516 + }, + { + "epoch": 0.7497837637464475, + "grad_norm": 1.2193919844014014, + "learning_rate": 1.4776672959617006e-05, + "loss": 0.32235798239707947, + "step": 1517 + }, + { + "epoch": 0.7502780180402817, + "grad_norm": 1.073591922439447, + "learning_rate": 1.4769493167417079e-05, + "loss": 0.30588477849960327, + "step": 1518 + }, + { + "epoch": 0.7507722723341159, + "grad_norm": 1.1259837125407774, + "learning_rate": 1.4762310191171657e-05, + "loss": 0.31242361664772034, + "step": 1519 + }, + { + "epoch": 0.7512665266279501, + "grad_norm": 1.2265290610094162, + "learning_rate": 1.4755124035675995e-05, + "loss": 0.3679526150226593, + "step": 1520 + }, + { + "epoch": 0.7517607809217842, + "grad_norm": 1.0185674037419847, + "learning_rate": 1.4747934705727473e-05, + "loss": 0.28588515520095825, + "step": 1521 + }, + { + "epoch": 0.7522550352156184, + "grad_norm": 1.0624456882482982, + "learning_rate": 1.4740742206125582e-05, + "loss": 0.29861775040626526, + "step": 1522 + }, + { + "epoch": 0.7527492895094526, + "grad_norm": 1.1245071890104912, + "learning_rate": 1.4733546541671928e-05, + "loss": 0.31373754143714905, + "step": 1523 + }, + { + "epoch": 0.7532435438032868, + "grad_norm": 1.1569601569555032, + "learning_rate": 1.472634771717024e-05, + "loss": 0.3127061128616333, + "step": 1524 + }, + { + "epoch": 0.7537377980971209, + "grad_norm": 1.0554556810771654, + "learning_rate": 1.4719145737426346e-05, + "loss": 0.33681541681289673, + "step": 1525 + }, + { + "epoch": 0.7542320523909551, + "grad_norm": 1.1202634511050926, + "learning_rate": 1.4711940607248182e-05, + "loss": 0.30266639590263367, + "step": 1526 + }, + { + "epoch": 0.7547263066847893, + "grad_norm": 1.0915134711866425, + "learning_rate": 1.47047323314458e-05, + "loss": 0.2988300323486328, + "step": 1527 + }, + { + "epoch": 0.7552205609786236, + "grad_norm": 1.1041853232471737, + "learning_rate": 1.4697520914831334e-05, + "loss": 0.32679620385169983, + "step": 1528 + }, + { + "epoch": 0.7557148152724577, + "grad_norm": 1.0049846597819565, + "learning_rate": 1.4690306362219024e-05, + "loss": 0.2935605049133301, + "step": 1529 + }, + { + "epoch": 0.7562090695662919, + "grad_norm": 1.1114952379308272, + "learning_rate": 1.4683088678425204e-05, + "loss": 0.303417831659317, + "step": 1530 + }, + { + "epoch": 0.7567033238601261, + "grad_norm": 1.0605597139601082, + "learning_rate": 1.4675867868268295e-05, + "loss": 0.30822527408599854, + "step": 1531 + }, + { + "epoch": 0.7571975781539602, + "grad_norm": 1.0772522309630048, + "learning_rate": 1.4668643936568807e-05, + "loss": 0.3104674220085144, + "step": 1532 + }, + { + "epoch": 0.7576918324477944, + "grad_norm": 1.0598818436947175, + "learning_rate": 1.4661416888149333e-05, + "loss": 0.27899307012557983, + "step": 1533 + }, + { + "epoch": 0.7581860867416286, + "grad_norm": 1.1291791785743877, + "learning_rate": 1.465418672783455e-05, + "loss": 0.3285380005836487, + "step": 1534 + }, + { + "epoch": 0.7586803410354628, + "grad_norm": 1.0773746767557166, + "learning_rate": 1.4646953460451205e-05, + "loss": 0.32028889656066895, + "step": 1535 + }, + { + "epoch": 0.7591745953292969, + "grad_norm": 1.2647242329167074, + "learning_rate": 1.4639717090828127e-05, + "loss": 0.29870709776878357, + "step": 1536 + }, + { + "epoch": 0.7596688496231311, + "grad_norm": 1.1833149129368068, + "learning_rate": 1.4632477623796216e-05, + "loss": 0.3556699752807617, + "step": 1537 + }, + { + "epoch": 0.7601631039169653, + "grad_norm": 1.158341046754784, + "learning_rate": 1.462523506418843e-05, + "loss": 0.3433789014816284, + "step": 1538 + }, + { + "epoch": 0.7606573582107995, + "grad_norm": 1.2526530715160118, + "learning_rate": 1.4617989416839802e-05, + "loss": 0.3146114945411682, + "step": 1539 + }, + { + "epoch": 0.7611516125046336, + "grad_norm": 1.0254798742920868, + "learning_rate": 1.4610740686587424e-05, + "loss": 0.29029202461242676, + "step": 1540 + }, + { + "epoch": 0.7616458667984678, + "grad_norm": 1.2367212561484746, + "learning_rate": 1.4603488878270442e-05, + "loss": 0.2976688742637634, + "step": 1541 + }, + { + "epoch": 0.762140121092302, + "grad_norm": 1.0384933941129642, + "learning_rate": 1.459623399673006e-05, + "loss": 0.28604352474212646, + "step": 1542 + }, + { + "epoch": 0.7626343753861362, + "grad_norm": 1.2070329997652125, + "learning_rate": 1.4588976046809536e-05, + "loss": 0.34977301955223083, + "step": 1543 + }, + { + "epoch": 0.7631286296799703, + "grad_norm": 1.1108699937366455, + "learning_rate": 1.458171503335417e-05, + "loss": 0.31592974066734314, + "step": 1544 + }, + { + "epoch": 0.7636228839738045, + "grad_norm": 1.224645404968216, + "learning_rate": 1.4574450961211312e-05, + "loss": 0.31539830565452576, + "step": 1545 + }, + { + "epoch": 0.7641171382676387, + "grad_norm": 1.1914008033212045, + "learning_rate": 1.4567183835230355e-05, + "loss": 0.3100752532482147, + "step": 1546 + }, + { + "epoch": 0.7646113925614729, + "grad_norm": 1.1973069016485758, + "learning_rate": 1.4559913660262726e-05, + "loss": 0.31005364656448364, + "step": 1547 + }, + { + "epoch": 0.765105646855307, + "grad_norm": 1.102020410207535, + "learning_rate": 1.4552640441161889e-05, + "loss": 0.3050577640533447, + "step": 1548 + }, + { + "epoch": 0.7655999011491412, + "grad_norm": 1.1151715417212549, + "learning_rate": 1.4545364182783343e-05, + "loss": 0.294721394777298, + "step": 1549 + }, + { + "epoch": 0.7660941554429754, + "grad_norm": 1.0907882805879732, + "learning_rate": 1.4538084889984616e-05, + "loss": 0.2974075376987457, + "step": 1550 + }, + { + "epoch": 0.7665884097368096, + "grad_norm": 1.218483256285566, + "learning_rate": 1.4530802567625259e-05, + "loss": 0.3247089385986328, + "step": 1551 + }, + { + "epoch": 0.7670826640306437, + "grad_norm": 1.1751647980540385, + "learning_rate": 1.4523517220566843e-05, + "loss": 0.3219151198863983, + "step": 1552 + }, + { + "epoch": 0.7675769183244779, + "grad_norm": 1.092743254793347, + "learning_rate": 1.4516228853672962e-05, + "loss": 0.30580246448516846, + "step": 1553 + }, + { + "epoch": 0.7680711726183121, + "grad_norm": 1.0670412493946726, + "learning_rate": 1.4508937471809233e-05, + "loss": 0.2983207702636719, + "step": 1554 + }, + { + "epoch": 0.7685654269121464, + "grad_norm": 1.1155603134808716, + "learning_rate": 1.4501643079843266e-05, + "loss": 0.3429039418697357, + "step": 1555 + }, + { + "epoch": 0.7690596812059804, + "grad_norm": 1.0600298870014666, + "learning_rate": 1.4494345682644704e-05, + "loss": 0.3055192530155182, + "step": 1556 + }, + { + "epoch": 0.7695539354998147, + "grad_norm": 1.0843598968647987, + "learning_rate": 1.4487045285085178e-05, + "loss": 0.2964102327823639, + "step": 1557 + }, + { + "epoch": 0.7700481897936489, + "grad_norm": 1.0436581793993642, + "learning_rate": 1.4479741892038335e-05, + "loss": 0.3088444471359253, + "step": 1558 + }, + { + "epoch": 0.770542444087483, + "grad_norm": 1.070280126063037, + "learning_rate": 1.4472435508379808e-05, + "loss": 0.28697890043258667, + "step": 1559 + }, + { + "epoch": 0.7710366983813172, + "grad_norm": 1.1055317673748768, + "learning_rate": 1.4465126138987242e-05, + "loss": 0.3664681315422058, + "step": 1560 + }, + { + "epoch": 0.7715309526751514, + "grad_norm": 1.1042702127280148, + "learning_rate": 1.4457813788740263e-05, + "loss": 0.3282932937145233, + "step": 1561 + }, + { + "epoch": 0.7720252069689856, + "grad_norm": 1.4857133307558297, + "learning_rate": 1.4450498462520495e-05, + "loss": 0.27597576379776, + "step": 1562 + }, + { + "epoch": 0.7725194612628197, + "grad_norm": 1.2214452597170176, + "learning_rate": 1.4443180165211541e-05, + "loss": 0.3553946614265442, + "step": 1563 + }, + { + "epoch": 0.7730137155566539, + "grad_norm": 1.1827716129984904, + "learning_rate": 1.4435858901698995e-05, + "loss": 0.36224859952926636, + "step": 1564 + }, + { + "epoch": 0.7735079698504881, + "grad_norm": 1.1341836510498036, + "learning_rate": 1.4428534676870427e-05, + "loss": 0.2940914034843445, + "step": 1565 + }, + { + "epoch": 0.7740022241443223, + "grad_norm": 0.9563512574257287, + "learning_rate": 1.4421207495615385e-05, + "loss": 0.2717741131782532, + "step": 1566 + }, + { + "epoch": 0.7744964784381564, + "grad_norm": 1.1327871067959112, + "learning_rate": 1.441387736282539e-05, + "loss": 0.32340431213378906, + "step": 1567 + }, + { + "epoch": 0.7749907327319906, + "grad_norm": 1.1090264087970254, + "learning_rate": 1.4406544283393935e-05, + "loss": 0.3080120086669922, + "step": 1568 + }, + { + "epoch": 0.7754849870258248, + "grad_norm": 1.4441577426158039, + "learning_rate": 1.4399208262216475e-05, + "loss": 0.3118380308151245, + "step": 1569 + }, + { + "epoch": 0.775979241319659, + "grad_norm": 1.3307213271784917, + "learning_rate": 1.439186930419044e-05, + "loss": 0.3086084723472595, + "step": 1570 + }, + { + "epoch": 0.7764734956134931, + "grad_norm": 1.1593176371811458, + "learning_rate": 1.438452741421521e-05, + "loss": 0.3233364522457123, + "step": 1571 + }, + { + "epoch": 0.7769677499073273, + "grad_norm": 1.0623874748102813, + "learning_rate": 1.4377182597192124e-05, + "loss": 0.29029640555381775, + "step": 1572 + }, + { + "epoch": 0.7774620042011615, + "grad_norm": 0.9791711244739897, + "learning_rate": 1.4369834858024476e-05, + "loss": 0.2888006567955017, + "step": 1573 + }, + { + "epoch": 0.7779562584949957, + "grad_norm": 1.1118016172702438, + "learning_rate": 1.4362484201617519e-05, + "loss": 0.3260151743888855, + "step": 1574 + }, + { + "epoch": 0.7784505127888298, + "grad_norm": 1.3306536044832058, + "learning_rate": 1.4355130632878439e-05, + "loss": 0.333207905292511, + "step": 1575 + }, + { + "epoch": 0.778944767082664, + "grad_norm": 1.0844273121477916, + "learning_rate": 1.4347774156716375e-05, + "loss": 0.2577935457229614, + "step": 1576 + }, + { + "epoch": 0.7794390213764982, + "grad_norm": 1.0777103823564191, + "learning_rate": 1.434041477804241e-05, + "loss": 0.29645979404449463, + "step": 1577 + }, + { + "epoch": 0.7799332756703324, + "grad_norm": 1.1743796307407597, + "learning_rate": 1.433305250176955e-05, + "loss": 0.2973156273365021, + "step": 1578 + }, + { + "epoch": 0.7804275299641665, + "grad_norm": 1.0277241805983874, + "learning_rate": 1.4325687332812754e-05, + "loss": 0.29159975051879883, + "step": 1579 + }, + { + "epoch": 0.7809217842580007, + "grad_norm": 1.1751334806332727, + "learning_rate": 1.4318319276088902e-05, + "loss": 0.29718664288520813, + "step": 1580 + }, + { + "epoch": 0.781416038551835, + "grad_norm": 1.316577919508971, + "learning_rate": 1.4310948336516803e-05, + "loss": 0.3262369632720947, + "step": 1581 + }, + { + "epoch": 0.781910292845669, + "grad_norm": 1.182680350644687, + "learning_rate": 1.4303574519017187e-05, + "loss": 0.36491623520851135, + "step": 1582 + }, + { + "epoch": 0.7824045471395032, + "grad_norm": 1.181580153295467, + "learning_rate": 1.4296197828512716e-05, + "loss": 0.3558582365512848, + "step": 1583 + }, + { + "epoch": 0.7828988014333375, + "grad_norm": 0.9802630700834107, + "learning_rate": 1.428881826992796e-05, + "loss": 0.2745930552482605, + "step": 1584 + }, + { + "epoch": 0.7833930557271717, + "grad_norm": 1.1668091765691224, + "learning_rate": 1.4281435848189404e-05, + "loss": 0.3239384889602661, + "step": 1585 + }, + { + "epoch": 0.7838873100210058, + "grad_norm": 1.0164738185404556, + "learning_rate": 1.4274050568225452e-05, + "loss": 0.2708761692047119, + "step": 1586 + }, + { + "epoch": 0.78438156431484, + "grad_norm": 1.2356501028179845, + "learning_rate": 1.4266662434966412e-05, + "loss": 0.3633013963699341, + "step": 1587 + }, + { + "epoch": 0.7848758186086742, + "grad_norm": 1.2145151160613337, + "learning_rate": 1.425927145334449e-05, + "loss": 0.36411651968955994, + "step": 1588 + }, + { + "epoch": 0.7853700729025084, + "grad_norm": 1.2093753197442545, + "learning_rate": 1.4251877628293804e-05, + "loss": 0.3120966851711273, + "step": 1589 + }, + { + "epoch": 0.7858643271963425, + "grad_norm": 1.111474907013162, + "learning_rate": 1.4244480964750365e-05, + "loss": 0.32788634300231934, + "step": 1590 + }, + { + "epoch": 0.7863585814901767, + "grad_norm": 1.1320230499507122, + "learning_rate": 1.423708146765208e-05, + "loss": 0.2919159233570099, + "step": 1591 + }, + { + "epoch": 0.7868528357840109, + "grad_norm": 1.1271090926469096, + "learning_rate": 1.4229679141938749e-05, + "loss": 0.3135683834552765, + "step": 1592 + }, + { + "epoch": 0.7873470900778451, + "grad_norm": 1.2447784007425877, + "learning_rate": 1.4222273992552058e-05, + "loss": 0.351981520652771, + "step": 1593 + }, + { + "epoch": 0.7878413443716792, + "grad_norm": 1.1846979202846248, + "learning_rate": 1.4214866024435576e-05, + "loss": 0.3615785837173462, + "step": 1594 + }, + { + "epoch": 0.7883355986655134, + "grad_norm": 1.1632616021817466, + "learning_rate": 1.420745524253476e-05, + "loss": 0.29399484395980835, + "step": 1595 + }, + { + "epoch": 0.7888298529593476, + "grad_norm": 1.1714512606078011, + "learning_rate": 1.420004165179694e-05, + "loss": 0.30501872301101685, + "step": 1596 + }, + { + "epoch": 0.7893241072531818, + "grad_norm": 1.1172632404953093, + "learning_rate": 1.4192625257171331e-05, + "loss": 0.33745667338371277, + "step": 1597 + }, + { + "epoch": 0.7898183615470159, + "grad_norm": 0.994693525988225, + "learning_rate": 1.4185206063609e-05, + "loss": 0.2675662934780121, + "step": 1598 + }, + { + "epoch": 0.7903126158408501, + "grad_norm": 1.022107075414073, + "learning_rate": 1.41777840760629e-05, + "loss": 0.295659601688385, + "step": 1599 + }, + { + "epoch": 0.7908068701346843, + "grad_norm": 1.119079517603524, + "learning_rate": 1.4170359299487848e-05, + "loss": 0.3164275586605072, + "step": 1600 + }, + { + "epoch": 0.7913011244285185, + "grad_norm": 1.0695885495482724, + "learning_rate": 1.416293173884051e-05, + "loss": 0.3039100766181946, + "step": 1601 + }, + { + "epoch": 0.7917953787223526, + "grad_norm": 1.1080665801372258, + "learning_rate": 1.4155501399079427e-05, + "loss": 0.2994040846824646, + "step": 1602 + }, + { + "epoch": 0.7922896330161868, + "grad_norm": 1.3291271745996591, + "learning_rate": 1.4148068285164984e-05, + "loss": 0.3129369616508484, + "step": 1603 + }, + { + "epoch": 0.792783887310021, + "grad_norm": 1.084724718149673, + "learning_rate": 1.4140632402059424e-05, + "loss": 0.3223167657852173, + "step": 1604 + }, + { + "epoch": 0.7932781416038552, + "grad_norm": 1.0882285752839331, + "learning_rate": 1.4133193754726834e-05, + "loss": 0.2734811305999756, + "step": 1605 + }, + { + "epoch": 0.7937723958976893, + "grad_norm": 1.103029405529104, + "learning_rate": 1.4125752348133148e-05, + "loss": 0.27474087476730347, + "step": 1606 + }, + { + "epoch": 0.7942666501915235, + "grad_norm": 1.0487344928171054, + "learning_rate": 1.4118308187246145e-05, + "loss": 0.2619907557964325, + "step": 1607 + }, + { + "epoch": 0.7947609044853577, + "grad_norm": 1.1981880636142406, + "learning_rate": 1.411086127703544e-05, + "loss": 0.3176937699317932, + "step": 1608 + }, + { + "epoch": 0.7952551587791918, + "grad_norm": 1.12323060393325, + "learning_rate": 1.4103411622472483e-05, + "loss": 0.28044235706329346, + "step": 1609 + }, + { + "epoch": 0.795749413073026, + "grad_norm": 1.0360499320558048, + "learning_rate": 1.409595922853056e-05, + "loss": 0.27778196334838867, + "step": 1610 + }, + { + "epoch": 0.7962436673668603, + "grad_norm": 1.2703211339383462, + "learning_rate": 1.4088504100184777e-05, + "loss": 0.3168628513813019, + "step": 1611 + }, + { + "epoch": 0.7967379216606945, + "grad_norm": 1.1557608708585085, + "learning_rate": 1.4081046242412075e-05, + "loss": 0.30454084277153015, + "step": 1612 + }, + { + "epoch": 0.7972321759545286, + "grad_norm": 1.169123128871501, + "learning_rate": 1.4073585660191214e-05, + "loss": 0.34019169211387634, + "step": 1613 + }, + { + "epoch": 0.7977264302483628, + "grad_norm": 1.173717391982327, + "learning_rate": 1.4066122358502772e-05, + "loss": 0.3044774830341339, + "step": 1614 + }, + { + "epoch": 0.798220684542197, + "grad_norm": 1.1570346377203322, + "learning_rate": 1.4058656342329136e-05, + "loss": 0.3181847333908081, + "step": 1615 + }, + { + "epoch": 0.7987149388360312, + "grad_norm": 1.249158616205248, + "learning_rate": 1.405118761665452e-05, + "loss": 0.3400845229625702, + "step": 1616 + }, + { + "epoch": 0.7992091931298653, + "grad_norm": 1.2103435711338524, + "learning_rate": 1.4043716186464935e-05, + "loss": 0.2845221161842346, + "step": 1617 + }, + { + "epoch": 0.7997034474236995, + "grad_norm": 1.060854004382088, + "learning_rate": 1.4036242056748202e-05, + "loss": 0.27315276861190796, + "step": 1618 + }, + { + "epoch": 0.8001977017175337, + "grad_norm": 1.2994888590220768, + "learning_rate": 1.4028765232493942e-05, + "loss": 0.3388780951499939, + "step": 1619 + }, + { + "epoch": 0.8006919560113679, + "grad_norm": 1.282329812705599, + "learning_rate": 1.4021285718693581e-05, + "loss": 0.338635116815567, + "step": 1620 + }, + { + "epoch": 0.801186210305202, + "grad_norm": 1.051985157077811, + "learning_rate": 1.4013803520340328e-05, + "loss": 0.26962924003601074, + "step": 1621 + }, + { + "epoch": 0.8016804645990362, + "grad_norm": 1.119736165525956, + "learning_rate": 1.4006318642429194e-05, + "loss": 0.32106393575668335, + "step": 1622 + }, + { + "epoch": 0.8021747188928704, + "grad_norm": 1.1215264874092639, + "learning_rate": 1.399883108995698e-05, + "loss": 0.33063358068466187, + "step": 1623 + }, + { + "epoch": 0.8026689731867046, + "grad_norm": 1.2875541426354853, + "learning_rate": 1.3991340867922266e-05, + "loss": 0.31906163692474365, + "step": 1624 + }, + { + "epoch": 0.8031632274805387, + "grad_norm": 1.0397829646035845, + "learning_rate": 1.3983847981325415e-05, + "loss": 0.2601381242275238, + "step": 1625 + }, + { + "epoch": 0.8036574817743729, + "grad_norm": 1.1557585059548563, + "learning_rate": 1.3976352435168577e-05, + "loss": 0.3342537581920624, + "step": 1626 + }, + { + "epoch": 0.8041517360682071, + "grad_norm": 1.2564737583224261, + "learning_rate": 1.3968854234455669e-05, + "loss": 0.3372059166431427, + "step": 1627 + }, + { + "epoch": 0.8046459903620413, + "grad_norm": 1.1676806235835944, + "learning_rate": 1.3961353384192377e-05, + "loss": 0.31026744842529297, + "step": 1628 + }, + { + "epoch": 0.8051402446558754, + "grad_norm": 1.0921501695742, + "learning_rate": 1.3953849889386173e-05, + "loss": 0.2867652177810669, + "step": 1629 + }, + { + "epoch": 0.8056344989497096, + "grad_norm": 1.1055169200249502, + "learning_rate": 1.3946343755046274e-05, + "loss": 0.29169392585754395, + "step": 1630 + }, + { + "epoch": 0.8061287532435438, + "grad_norm": 1.0753220774925722, + "learning_rate": 1.393883498618367e-05, + "loss": 0.2976510524749756, + "step": 1631 + }, + { + "epoch": 0.806623007537378, + "grad_norm": 1.1387290098549956, + "learning_rate": 1.3931323587811107e-05, + "loss": 0.2900371551513672, + "step": 1632 + }, + { + "epoch": 0.8071172618312121, + "grad_norm": 1.0560549112494348, + "learning_rate": 1.3923809564943093e-05, + "loss": 0.31660354137420654, + "step": 1633 + }, + { + "epoch": 0.8076115161250463, + "grad_norm": 1.079892158607702, + "learning_rate": 1.3916292922595875e-05, + "loss": 0.3099827468395233, + "step": 1634 + }, + { + "epoch": 0.8081057704188805, + "grad_norm": 1.1450154190444473, + "learning_rate": 1.3908773665787459e-05, + "loss": 0.34322571754455566, + "step": 1635 + }, + { + "epoch": 0.8086000247127146, + "grad_norm": 1.0812992610334402, + "learning_rate": 1.3901251799537592e-05, + "loss": 0.2780989408493042, + "step": 1636 + }, + { + "epoch": 0.8090942790065488, + "grad_norm": 1.1023331343203706, + "learning_rate": 1.389372732886777e-05, + "loss": 0.31049463152885437, + "step": 1637 + }, + { + "epoch": 0.809588533300383, + "grad_norm": 1.0442513462466116, + "learning_rate": 1.3886200258801213e-05, + "loss": 0.29925107955932617, + "step": 1638 + }, + { + "epoch": 0.8100827875942173, + "grad_norm": 1.1039524368767084, + "learning_rate": 1.3878670594362893e-05, + "loss": 0.31893983483314514, + "step": 1639 + }, + { + "epoch": 0.8105770418880514, + "grad_norm": 1.1529184850949745, + "learning_rate": 1.3871138340579502e-05, + "loss": 0.31307080388069153, + "step": 1640 + }, + { + "epoch": 0.8110712961818856, + "grad_norm": 1.068880489111062, + "learning_rate": 1.3863603502479465e-05, + "loss": 0.28198909759521484, + "step": 1641 + }, + { + "epoch": 0.8115655504757198, + "grad_norm": 1.085975037148026, + "learning_rate": 1.3856066085092936e-05, + "loss": 0.28937461972236633, + "step": 1642 + }, + { + "epoch": 0.812059804769554, + "grad_norm": 1.2145338549731968, + "learning_rate": 1.3848526093451789e-05, + "loss": 0.32332292199134827, + "step": 1643 + }, + { + "epoch": 0.8125540590633881, + "grad_norm": 1.1174132019487801, + "learning_rate": 1.3840983532589606e-05, + "loss": 0.3059847056865692, + "step": 1644 + }, + { + "epoch": 0.8130483133572223, + "grad_norm": 1.0961695985122493, + "learning_rate": 1.3833438407541698e-05, + "loss": 0.2939583957195282, + "step": 1645 + }, + { + "epoch": 0.8135425676510565, + "grad_norm": 1.098340825845408, + "learning_rate": 1.3825890723345082e-05, + "loss": 0.3293933868408203, + "step": 1646 + }, + { + "epoch": 0.8140368219448907, + "grad_norm": 1.230371641918686, + "learning_rate": 1.3818340485038488e-05, + "loss": 0.33373600244522095, + "step": 1647 + }, + { + "epoch": 0.8145310762387248, + "grad_norm": 1.0438422749127716, + "learning_rate": 1.3810787697662337e-05, + "loss": 0.2716716527938843, + "step": 1648 + }, + { + "epoch": 0.815025330532559, + "grad_norm": 1.0698510483790238, + "learning_rate": 1.3803232366258774e-05, + "loss": 0.26109835505485535, + "step": 1649 + }, + { + "epoch": 0.8155195848263932, + "grad_norm": 1.1399890180248013, + "learning_rate": 1.3795674495871627e-05, + "loss": 0.3161536753177643, + "step": 1650 + }, + { + "epoch": 0.8160138391202274, + "grad_norm": 1.0602857079391073, + "learning_rate": 1.3788114091546414e-05, + "loss": 0.3078432083129883, + "step": 1651 + }, + { + "epoch": 0.8165080934140615, + "grad_norm": 1.2174185154702881, + "learning_rate": 1.3780551158330364e-05, + "loss": 0.31023627519607544, + "step": 1652 + }, + { + "epoch": 0.8170023477078957, + "grad_norm": 1.2616374354619766, + "learning_rate": 1.3772985701272374e-05, + "loss": 0.3438849151134491, + "step": 1653 + }, + { + "epoch": 0.8174966020017299, + "grad_norm": 1.178467386446937, + "learning_rate": 1.376541772542304e-05, + "loss": 0.31897789239883423, + "step": 1654 + }, + { + "epoch": 0.8179908562955641, + "grad_norm": 1.1125307511503921, + "learning_rate": 1.3757847235834636e-05, + "loss": 0.3101171553134918, + "step": 1655 + }, + { + "epoch": 0.8184851105893982, + "grad_norm": 1.0084998738545823, + "learning_rate": 1.375027423756111e-05, + "loss": 0.28926995396614075, + "step": 1656 + }, + { + "epoch": 0.8189793648832324, + "grad_norm": 1.0711136459333532, + "learning_rate": 1.3742698735658087e-05, + "loss": 0.322610080242157, + "step": 1657 + }, + { + "epoch": 0.8194736191770666, + "grad_norm": 1.272869487692088, + "learning_rate": 1.3735120735182865e-05, + "loss": 0.27430039644241333, + "step": 1658 + }, + { + "epoch": 0.8199678734709008, + "grad_norm": 1.217768709412782, + "learning_rate": 1.3727540241194408e-05, + "loss": 0.3091571629047394, + "step": 1659 + }, + { + "epoch": 0.8204621277647349, + "grad_norm": 1.1956758026057746, + "learning_rate": 1.3719957258753347e-05, + "loss": 0.3039378523826599, + "step": 1660 + }, + { + "epoch": 0.8209563820585691, + "grad_norm": 1.0829759157920493, + "learning_rate": 1.371237179292197e-05, + "loss": 0.29711851477622986, + "step": 1661 + }, + { + "epoch": 0.8214506363524033, + "grad_norm": 1.238383962420106, + "learning_rate": 1.370478384876423e-05, + "loss": 0.32411956787109375, + "step": 1662 + }, + { + "epoch": 0.8219448906462374, + "grad_norm": 1.124344248809279, + "learning_rate": 1.3697193431345725e-05, + "loss": 0.2981719672679901, + "step": 1663 + }, + { + "epoch": 0.8224391449400716, + "grad_norm": 1.1755056696925432, + "learning_rate": 1.3689600545733713e-05, + "loss": 0.32756730914115906, + "step": 1664 + }, + { + "epoch": 0.8229333992339058, + "grad_norm": 1.2289352827455349, + "learning_rate": 1.3682005196997094e-05, + "loss": 0.3910979628562927, + "step": 1665 + }, + { + "epoch": 0.82342765352774, + "grad_norm": 1.1421419999727185, + "learning_rate": 1.3674407390206417e-05, + "loss": 0.31716856360435486, + "step": 1666 + }, + { + "epoch": 0.8239219078215742, + "grad_norm": 1.087053372594853, + "learning_rate": 1.3666807130433865e-05, + "loss": 0.31816208362579346, + "step": 1667 + }, + { + "epoch": 0.8244161621154084, + "grad_norm": 1.075967107077939, + "learning_rate": 1.3659204422753265e-05, + "loss": 0.3008955121040344, + "step": 1668 + }, + { + "epoch": 0.8249104164092426, + "grad_norm": 1.0356290376868373, + "learning_rate": 1.3651599272240078e-05, + "loss": 0.2957409918308258, + "step": 1669 + }, + { + "epoch": 0.8254046707030768, + "grad_norm": 1.0989692049502364, + "learning_rate": 1.364399168397139e-05, + "loss": 0.33019471168518066, + "step": 1670 + }, + { + "epoch": 0.8258989249969109, + "grad_norm": 1.1429937641963879, + "learning_rate": 1.3636381663025917e-05, + "loss": 0.3532376289367676, + "step": 1671 + }, + { + "epoch": 0.8263931792907451, + "grad_norm": 1.1063068636148639, + "learning_rate": 1.362876921448401e-05, + "loss": 0.2980180084705353, + "step": 1672 + }, + { + "epoch": 0.8268874335845793, + "grad_norm": 1.0418269417044947, + "learning_rate": 1.362115434342762e-05, + "loss": 0.27932479977607727, + "step": 1673 + }, + { + "epoch": 0.8273816878784135, + "grad_norm": 1.1782768860255097, + "learning_rate": 1.3613537054940331e-05, + "loss": 0.2783966064453125, + "step": 1674 + }, + { + "epoch": 0.8278759421722476, + "grad_norm": 1.1803795842967677, + "learning_rate": 1.3605917354107336e-05, + "loss": 0.2957308888435364, + "step": 1675 + }, + { + "epoch": 0.8283701964660818, + "grad_norm": 1.180747940998609, + "learning_rate": 1.3598295246015439e-05, + "loss": 0.31640201807022095, + "step": 1676 + }, + { + "epoch": 0.828864450759916, + "grad_norm": 1.0988354367735653, + "learning_rate": 1.3590670735753047e-05, + "loss": 0.2969709634780884, + "step": 1677 + }, + { + "epoch": 0.8293587050537502, + "grad_norm": 1.1164468460017938, + "learning_rate": 1.3583043828410177e-05, + "loss": 0.34167301654815674, + "step": 1678 + }, + { + "epoch": 0.8298529593475843, + "grad_norm": 1.0956930352290435, + "learning_rate": 1.3575414529078443e-05, + "loss": 0.28540804982185364, + "step": 1679 + }, + { + "epoch": 0.8303472136414185, + "grad_norm": 1.1795719749617215, + "learning_rate": 1.3567782842851054e-05, + "loss": 0.2962091565132141, + "step": 1680 + }, + { + "epoch": 0.8308414679352527, + "grad_norm": 1.1969039130243166, + "learning_rate": 1.3560148774822816e-05, + "loss": 0.3650284707546234, + "step": 1681 + }, + { + "epoch": 0.8313357222290869, + "grad_norm": 1.1374534594887609, + "learning_rate": 1.3552512330090126e-05, + "loss": 0.3134267330169678, + "step": 1682 + }, + { + "epoch": 0.831829976522921, + "grad_norm": 1.0921894303145987, + "learning_rate": 1.3544873513750967e-05, + "loss": 0.3020439147949219, + "step": 1683 + }, + { + "epoch": 0.8323242308167552, + "grad_norm": 0.9393581319245673, + "learning_rate": 1.3537232330904895e-05, + "loss": 0.25083282589912415, + "step": 1684 + }, + { + "epoch": 0.8328184851105894, + "grad_norm": 1.2024147558027563, + "learning_rate": 1.3529588786653063e-05, + "loss": 0.33875352144241333, + "step": 1685 + }, + { + "epoch": 0.8333127394044236, + "grad_norm": 1.0620839528979684, + "learning_rate": 1.3521942886098186e-05, + "loss": 0.2717735171318054, + "step": 1686 + }, + { + "epoch": 0.8338069936982577, + "grad_norm": 1.1255995988400895, + "learning_rate": 1.3514294634344562e-05, + "loss": 0.271842896938324, + "step": 1687 + }, + { + "epoch": 0.8343012479920919, + "grad_norm": 1.3262220000473801, + "learning_rate": 1.3506644036498054e-05, + "loss": 0.29420506954193115, + "step": 1688 + }, + { + "epoch": 0.8347955022859261, + "grad_norm": 1.338127401529371, + "learning_rate": 1.349899109766609e-05, + "loss": 0.3336431682109833, + "step": 1689 + }, + { + "epoch": 0.8352897565797602, + "grad_norm": 1.0514224360912943, + "learning_rate": 1.3491335822957665e-05, + "loss": 0.2848295569419861, + "step": 1690 + }, + { + "epoch": 0.8357840108735944, + "grad_norm": 1.1721842125626762, + "learning_rate": 1.3483678217483327e-05, + "loss": 0.3164542019367218, + "step": 1691 + }, + { + "epoch": 0.8362782651674286, + "grad_norm": 1.1542823329984544, + "learning_rate": 1.3476018286355189e-05, + "loss": 0.3030688762664795, + "step": 1692 + }, + { + "epoch": 0.8367725194612629, + "grad_norm": 1.3329503320081877, + "learning_rate": 1.3468356034686912e-05, + "loss": 0.30218198895454407, + "step": 1693 + }, + { + "epoch": 0.837266773755097, + "grad_norm": 1.1429497129560076, + "learning_rate": 1.3460691467593697e-05, + "loss": 0.3327499032020569, + "step": 1694 + }, + { + "epoch": 0.8377610280489312, + "grad_norm": 1.2198627663252626, + "learning_rate": 1.3453024590192307e-05, + "loss": 0.29298892617225647, + "step": 1695 + }, + { + "epoch": 0.8382552823427654, + "grad_norm": 1.238368209416205, + "learning_rate": 1.344535540760104e-05, + "loss": 0.3096858859062195, + "step": 1696 + }, + { + "epoch": 0.8387495366365996, + "grad_norm": 1.1297510733547198, + "learning_rate": 1.3437683924939731e-05, + "loss": 0.30680233240127563, + "step": 1697 + }, + { + "epoch": 0.8392437909304337, + "grad_norm": 1.13902422944666, + "learning_rate": 1.3430010147329752e-05, + "loss": 0.3139989972114563, + "step": 1698 + }, + { + "epoch": 0.8397380452242679, + "grad_norm": 1.132396621648215, + "learning_rate": 1.3422334079894008e-05, + "loss": 0.30418652296066284, + "step": 1699 + }, + { + "epoch": 0.8402322995181021, + "grad_norm": 1.228592620621731, + "learning_rate": 1.3414655727756931e-05, + "loss": 0.31245100498199463, + "step": 1700 + }, + { + "epoch": 0.8407265538119363, + "grad_norm": 1.1908375195801162, + "learning_rate": 1.3406975096044477e-05, + "loss": 0.3381880223751068, + "step": 1701 + }, + { + "epoch": 0.8412208081057704, + "grad_norm": 1.2009611203254438, + "learning_rate": 1.3399292189884135e-05, + "loss": 0.3359968960285187, + "step": 1702 + }, + { + "epoch": 0.8417150623996046, + "grad_norm": 1.2698512981575327, + "learning_rate": 1.3391607014404891e-05, + "loss": 0.3320350646972656, + "step": 1703 + }, + { + "epoch": 0.8422093166934388, + "grad_norm": 1.1615181813433448, + "learning_rate": 1.3383919574737267e-05, + "loss": 0.32830795645713806, + "step": 1704 + }, + { + "epoch": 0.842703570987273, + "grad_norm": 0.9808785682252426, + "learning_rate": 1.3376229876013285e-05, + "loss": 0.255840927362442, + "step": 1705 + }, + { + "epoch": 0.8431978252811071, + "grad_norm": 1.0739012833500008, + "learning_rate": 1.3368537923366476e-05, + "loss": 0.3110755681991577, + "step": 1706 + }, + { + "epoch": 0.8436920795749413, + "grad_norm": 1.0815136095330147, + "learning_rate": 1.336084372193188e-05, + "loss": 0.28063881397247314, + "step": 1707 + }, + { + "epoch": 0.8441863338687755, + "grad_norm": 1.1539434345644544, + "learning_rate": 1.3353147276846042e-05, + "loss": 0.31297358870506287, + "step": 1708 + }, + { + "epoch": 0.8446805881626097, + "grad_norm": 1.155638509555895, + "learning_rate": 1.3345448593246986e-05, + "loss": 0.30750149488449097, + "step": 1709 + }, + { + "epoch": 0.8451748424564438, + "grad_norm": 1.0259778822912606, + "learning_rate": 1.333774767627425e-05, + "loss": 0.2665224075317383, + "step": 1710 + }, + { + "epoch": 0.845669096750278, + "grad_norm": 1.0618832452009934, + "learning_rate": 1.3330044531068858e-05, + "loss": 0.28920280933380127, + "step": 1711 + }, + { + "epoch": 0.8461633510441122, + "grad_norm": 1.0688762844449171, + "learning_rate": 1.332233916277332e-05, + "loss": 0.2678643465042114, + "step": 1712 + }, + { + "epoch": 0.8466576053379464, + "grad_norm": 1.1389370638959122, + "learning_rate": 1.3314631576531623e-05, + "loss": 0.33682242035865784, + "step": 1713 + }, + { + "epoch": 0.8471518596317805, + "grad_norm": 1.2088936099945806, + "learning_rate": 1.330692177748925e-05, + "loss": 0.36704546213150024, + "step": 1714 + }, + { + "epoch": 0.8476461139256147, + "grad_norm": 1.0972613113130176, + "learning_rate": 1.3299209770793144e-05, + "loss": 0.3183630108833313, + "step": 1715 + }, + { + "epoch": 0.8481403682194489, + "grad_norm": 1.0799352919589156, + "learning_rate": 1.3291495561591736e-05, + "loss": 0.27138596773147583, + "step": 1716 + }, + { + "epoch": 0.848634622513283, + "grad_norm": 1.1332588592044, + "learning_rate": 1.3283779155034925e-05, + "loss": 0.30252328515052795, + "step": 1717 + }, + { + "epoch": 0.8491288768071172, + "grad_norm": 1.1212549613542353, + "learning_rate": 1.3276060556274067e-05, + "loss": 0.29494598507881165, + "step": 1718 + }, + { + "epoch": 0.8496231311009514, + "grad_norm": 1.01148770717553, + "learning_rate": 1.3268339770461988e-05, + "loss": 0.2822422981262207, + "step": 1719 + }, + { + "epoch": 0.8501173853947857, + "grad_norm": 1.134036508201843, + "learning_rate": 1.3260616802752979e-05, + "loss": 0.3348005712032318, + "step": 1720 + }, + { + "epoch": 0.8506116396886197, + "grad_norm": 1.171053745899539, + "learning_rate": 1.3252891658302782e-05, + "loss": 0.3146229088306427, + "step": 1721 + }, + { + "epoch": 0.851105893982454, + "grad_norm": 1.1635384669674214, + "learning_rate": 1.3245164342268592e-05, + "loss": 0.34189414978027344, + "step": 1722 + }, + { + "epoch": 0.8516001482762882, + "grad_norm": 1.0403207041973201, + "learning_rate": 1.3237434859809055e-05, + "loss": 0.2967323958873749, + "step": 1723 + }, + { + "epoch": 0.8520944025701224, + "grad_norm": 1.1011411329678815, + "learning_rate": 1.3229703216084262e-05, + "loss": 0.329689085483551, + "step": 1724 + }, + { + "epoch": 0.8525886568639565, + "grad_norm": 1.1910259713127598, + "learning_rate": 1.3221969416255751e-05, + "loss": 0.33041107654571533, + "step": 1725 + }, + { + "epoch": 0.8530829111577907, + "grad_norm": 1.144468406694428, + "learning_rate": 1.321423346548649e-05, + "loss": 0.30197203159332275, + "step": 1726 + }, + { + "epoch": 0.8535771654516249, + "grad_norm": 1.1709857904248526, + "learning_rate": 1.3206495368940897e-05, + "loss": 0.29060906171798706, + "step": 1727 + }, + { + "epoch": 0.8540714197454591, + "grad_norm": 1.1769143322358042, + "learning_rate": 1.3198755131784808e-05, + "loss": 0.3119436502456665, + "step": 1728 + }, + { + "epoch": 0.8545656740392932, + "grad_norm": 1.1825299188260439, + "learning_rate": 1.31910127591855e-05, + "loss": 0.35256415605545044, + "step": 1729 + }, + { + "epoch": 0.8550599283331274, + "grad_norm": 1.169751710502227, + "learning_rate": 1.3183268256311665e-05, + "loss": 0.3093785345554352, + "step": 1730 + }, + { + "epoch": 0.8555541826269616, + "grad_norm": 1.0555303314758304, + "learning_rate": 1.317552162833343e-05, + "loss": 0.2713086009025574, + "step": 1731 + }, + { + "epoch": 0.8560484369207958, + "grad_norm": 1.1667835049569328, + "learning_rate": 1.3167772880422325e-05, + "loss": 0.3135699927806854, + "step": 1732 + }, + { + "epoch": 0.8565426912146299, + "grad_norm": 1.2127716623193672, + "learning_rate": 1.3160022017751308e-05, + "loss": 0.3077283501625061, + "step": 1733 + }, + { + "epoch": 0.8570369455084641, + "grad_norm": 1.0914461784602205, + "learning_rate": 1.3152269045494744e-05, + "loss": 0.2900918424129486, + "step": 1734 + }, + { + "epoch": 0.8575311998022983, + "grad_norm": 1.1010374385853228, + "learning_rate": 1.3144513968828406e-05, + "loss": 0.30828869342803955, + "step": 1735 + }, + { + "epoch": 0.8580254540961325, + "grad_norm": 1.2038482894608615, + "learning_rate": 1.3136756792929469e-05, + "loss": 0.32526400685310364, + "step": 1736 + }, + { + "epoch": 0.8585197083899666, + "grad_norm": 1.2033734524328428, + "learning_rate": 1.3128997522976518e-05, + "loss": 0.35023608803749084, + "step": 1737 + }, + { + "epoch": 0.8590139626838008, + "grad_norm": 1.0100870731750684, + "learning_rate": 1.312123616414953e-05, + "loss": 0.27287641167640686, + "step": 1738 + }, + { + "epoch": 0.859508216977635, + "grad_norm": 1.1797907328737691, + "learning_rate": 1.3113472721629871e-05, + "loss": 0.346009761095047, + "step": 1739 + }, + { + "epoch": 0.8600024712714691, + "grad_norm": 1.0724791595798373, + "learning_rate": 1.3105707200600312e-05, + "loss": 0.3297504186630249, + "step": 1740 + }, + { + "epoch": 0.8604967255653033, + "grad_norm": 1.1244989642514696, + "learning_rate": 1.3097939606245005e-05, + "loss": 0.29835087060928345, + "step": 1741 + }, + { + "epoch": 0.8609909798591375, + "grad_norm": 1.1715549927893771, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.31466037034988403, + "step": 1742 + }, + { + "epoch": 0.8614852341529717, + "grad_norm": 1.1357283105102867, + "learning_rate": 1.3082398218300646e-05, + "loss": 0.32722294330596924, + "step": 1743 + }, + { + "epoch": 0.8619794884468058, + "grad_norm": 1.0679826147860039, + "learning_rate": 1.3074624435086809e-05, + "loss": 0.2603963613510132, + "step": 1744 + }, + { + "epoch": 0.86247374274064, + "grad_norm": 1.3274641459704042, + "learning_rate": 1.3066848599297633e-05, + "loss": 0.3100607991218567, + "step": 1745 + }, + { + "epoch": 0.8629679970344742, + "grad_norm": 1.0941880035602098, + "learning_rate": 1.3059070716124145e-05, + "loss": 0.2772334814071655, + "step": 1746 + }, + { + "epoch": 0.8634622513283084, + "grad_norm": 1.0597299652706509, + "learning_rate": 1.305129079075876e-05, + "loss": 0.3097267746925354, + "step": 1747 + }, + { + "epoch": 0.8639565056221425, + "grad_norm": 0.9828148026871187, + "learning_rate": 1.304350882839524e-05, + "loss": 0.24734097719192505, + "step": 1748 + }, + { + "epoch": 0.8644507599159768, + "grad_norm": 1.1998311484351691, + "learning_rate": 1.3035724834228713e-05, + "loss": 0.32148587703704834, + "step": 1749 + }, + { + "epoch": 0.864945014209811, + "grad_norm": 1.0741747588028856, + "learning_rate": 1.3027938813455663e-05, + "loss": 0.3037404417991638, + "step": 1750 + }, + { + "epoch": 0.8654392685036452, + "grad_norm": 1.011225470292042, + "learning_rate": 1.3020150771273925e-05, + "loss": 0.30760154128074646, + "step": 1751 + }, + { + "epoch": 0.8659335227974793, + "grad_norm": 1.2184140511087935, + "learning_rate": 1.3012360712882681e-05, + "loss": 0.3169519305229187, + "step": 1752 + }, + { + "epoch": 0.8664277770913135, + "grad_norm": 1.1637013896789081, + "learning_rate": 1.300456864348247e-05, + "loss": 0.32497861981391907, + "step": 1753 + }, + { + "epoch": 0.8669220313851477, + "grad_norm": 1.1775374726585146, + "learning_rate": 1.2996774568275163e-05, + "loss": 0.3318047821521759, + "step": 1754 + }, + { + "epoch": 0.8674162856789819, + "grad_norm": 1.1616476146855954, + "learning_rate": 1.298897849246397e-05, + "loss": 0.32553863525390625, + "step": 1755 + }, + { + "epoch": 0.867910539972816, + "grad_norm": 1.2503555115463478, + "learning_rate": 1.2981180421253446e-05, + "loss": 0.36457520723342896, + "step": 1756 + }, + { + "epoch": 0.8684047942666502, + "grad_norm": 1.169094604095011, + "learning_rate": 1.2973380359849466e-05, + "loss": 0.3038361668586731, + "step": 1757 + }, + { + "epoch": 0.8688990485604844, + "grad_norm": 1.1027242700855706, + "learning_rate": 1.2965578313459246e-05, + "loss": 0.3219846785068512, + "step": 1758 + }, + { + "epoch": 0.8693933028543186, + "grad_norm": 1.1142620036406827, + "learning_rate": 1.2957774287291311e-05, + "loss": 0.3180781304836273, + "step": 1759 + }, + { + "epoch": 0.8698875571481527, + "grad_norm": 1.0882143758359024, + "learning_rate": 1.2949968286555527e-05, + "loss": 0.27302947640419006, + "step": 1760 + }, + { + "epoch": 0.8703818114419869, + "grad_norm": 1.1731384509972838, + "learning_rate": 1.2942160316463066e-05, + "loss": 0.31756314635276794, + "step": 1761 + }, + { + "epoch": 0.8708760657358211, + "grad_norm": 1.1248252560155063, + "learning_rate": 1.2934350382226412e-05, + "loss": 0.2921680510044098, + "step": 1762 + }, + { + "epoch": 0.8713703200296553, + "grad_norm": 1.2402414216219324, + "learning_rate": 1.2926538489059373e-05, + "loss": 0.36426985263824463, + "step": 1763 + }, + { + "epoch": 0.8718645743234894, + "grad_norm": 1.1314972333554063, + "learning_rate": 1.2918724642177054e-05, + "loss": 0.31873831152915955, + "step": 1764 + }, + { + "epoch": 0.8723588286173236, + "grad_norm": 1.0875738556359984, + "learning_rate": 1.2910908846795867e-05, + "loss": 0.30952733755111694, + "step": 1765 + }, + { + "epoch": 0.8728530829111578, + "grad_norm": 1.149047421683754, + "learning_rate": 1.2903091108133523e-05, + "loss": 0.33339035511016846, + "step": 1766 + }, + { + "epoch": 0.8733473372049919, + "grad_norm": 1.1272534729456736, + "learning_rate": 1.2895271431409038e-05, + "loss": 0.31531351804733276, + "step": 1767 + }, + { + "epoch": 0.8738415914988261, + "grad_norm": 1.0832848102714157, + "learning_rate": 1.2887449821842713e-05, + "loss": 0.3016526401042938, + "step": 1768 + }, + { + "epoch": 0.8743358457926603, + "grad_norm": 1.1203275148695215, + "learning_rate": 1.2879626284656141e-05, + "loss": 0.3364630341529846, + "step": 1769 + }, + { + "epoch": 0.8748301000864945, + "grad_norm": 1.049317791331816, + "learning_rate": 1.287180082507221e-05, + "loss": 0.29755398631095886, + "step": 1770 + }, + { + "epoch": 0.8753243543803286, + "grad_norm": 1.1616312337400467, + "learning_rate": 1.286397344831508e-05, + "loss": 0.2986103892326355, + "step": 1771 + }, + { + "epoch": 0.8758186086741628, + "grad_norm": 1.0876002245947722, + "learning_rate": 1.2856144159610197e-05, + "loss": 0.31291434168815613, + "step": 1772 + }, + { + "epoch": 0.876312862967997, + "grad_norm": 1.0995747953439883, + "learning_rate": 1.2848312964184283e-05, + "loss": 0.28285568952560425, + "step": 1773 + }, + { + "epoch": 0.8768071172618312, + "grad_norm": 1.1726581514839194, + "learning_rate": 1.2840479867265331e-05, + "loss": 0.3319891095161438, + "step": 1774 + }, + { + "epoch": 0.8773013715556653, + "grad_norm": 1.1459648615093148, + "learning_rate": 1.2832644874082604e-05, + "loss": 0.3265117406845093, + "step": 1775 + }, + { + "epoch": 0.8777956258494996, + "grad_norm": 1.1247446577558389, + "learning_rate": 1.2824807989866635e-05, + "loss": 0.32061511278152466, + "step": 1776 + }, + { + "epoch": 0.8782898801433338, + "grad_norm": 1.2889890707472673, + "learning_rate": 1.2816969219849214e-05, + "loss": 0.34278666973114014, + "step": 1777 + }, + { + "epoch": 0.878784134437168, + "grad_norm": 1.0560963427574246, + "learning_rate": 1.2809128569263387e-05, + "loss": 0.28335195779800415, + "step": 1778 + }, + { + "epoch": 0.8792783887310021, + "grad_norm": 1.261751311219404, + "learning_rate": 1.2801286043343468e-05, + "loss": 0.35037046670913696, + "step": 1779 + }, + { + "epoch": 0.8797726430248363, + "grad_norm": 1.2110241423671546, + "learning_rate": 1.2793441647325012e-05, + "loss": 0.30058878660202026, + "step": 1780 + }, + { + "epoch": 0.8802668973186705, + "grad_norm": 1.1234244113929972, + "learning_rate": 1.2785595386444824e-05, + "loss": 0.29526466131210327, + "step": 1781 + }, + { + "epoch": 0.8807611516125047, + "grad_norm": 1.128737037655087, + "learning_rate": 1.2777747265940956e-05, + "loss": 0.3194332718849182, + "step": 1782 + }, + { + "epoch": 0.8812554059063388, + "grad_norm": 1.1751557862784823, + "learning_rate": 1.2769897291052709e-05, + "loss": 0.33527326583862305, + "step": 1783 + }, + { + "epoch": 0.881749660200173, + "grad_norm": 1.1396387575620477, + "learning_rate": 1.2762045467020601e-05, + "loss": 0.3277815580368042, + "step": 1784 + }, + { + "epoch": 0.8822439144940072, + "grad_norm": 1.2472488401817894, + "learning_rate": 1.2754191799086406e-05, + "loss": 0.31030380725860596, + "step": 1785 + }, + { + "epoch": 0.8827381687878414, + "grad_norm": 1.2316289072611675, + "learning_rate": 1.274633629249312e-05, + "loss": 0.34496408700942993, + "step": 1786 + }, + { + "epoch": 0.8832324230816755, + "grad_norm": 1.1907817971144352, + "learning_rate": 1.2738478952484964e-05, + "loss": 0.31008201837539673, + "step": 1787 + }, + { + "epoch": 0.8837266773755097, + "grad_norm": 1.1874449425538405, + "learning_rate": 1.2730619784307388e-05, + "loss": 0.35956043004989624, + "step": 1788 + }, + { + "epoch": 0.8842209316693439, + "grad_norm": 1.075617061625236, + "learning_rate": 1.272275879320706e-05, + "loss": 0.2944573760032654, + "step": 1789 + }, + { + "epoch": 0.8847151859631781, + "grad_norm": 1.0739187183942678, + "learning_rate": 1.2714895984431863e-05, + "loss": 0.2941366136074066, + "step": 1790 + }, + { + "epoch": 0.8852094402570122, + "grad_norm": 1.3130838842625934, + "learning_rate": 1.2707031363230901e-05, + "loss": 0.34683144092559814, + "step": 1791 + }, + { + "epoch": 0.8857036945508464, + "grad_norm": 1.1309361657268096, + "learning_rate": 1.2699164934854475e-05, + "loss": 0.3014514744281769, + "step": 1792 + }, + { + "epoch": 0.8861979488446806, + "grad_norm": 1.1642635843186193, + "learning_rate": 1.2691296704554112e-05, + "loss": 0.2749955654144287, + "step": 1793 + }, + { + "epoch": 0.8866922031385147, + "grad_norm": 1.2406193113190336, + "learning_rate": 1.2683426677582518e-05, + "loss": 0.3707960844039917, + "step": 1794 + }, + { + "epoch": 0.8871864574323489, + "grad_norm": 1.098057655891237, + "learning_rate": 1.2675554859193615e-05, + "loss": 0.3122541606426239, + "step": 1795 + }, + { + "epoch": 0.8876807117261831, + "grad_norm": 1.1564617646628, + "learning_rate": 1.2667681254642521e-05, + "loss": 0.3072753846645355, + "step": 1796 + }, + { + "epoch": 0.8881749660200173, + "grad_norm": 1.1000251936377918, + "learning_rate": 1.2659805869185534e-05, + "loss": 0.27002331614494324, + "step": 1797 + }, + { + "epoch": 0.8886692203138514, + "grad_norm": 1.0649961261949041, + "learning_rate": 1.2651928708080155e-05, + "loss": 0.2775167226791382, + "step": 1798 + }, + { + "epoch": 0.8891634746076856, + "grad_norm": 1.0134446617324497, + "learning_rate": 1.2644049776585061e-05, + "loss": 0.30023425817489624, + "step": 1799 + }, + { + "epoch": 0.8896577289015198, + "grad_norm": 1.0536326288458973, + "learning_rate": 1.2636169079960116e-05, + "loss": 0.29491451382637024, + "step": 1800 + }, + { + "epoch": 0.890151983195354, + "grad_norm": 1.1393442237009457, + "learning_rate": 1.2628286623466359e-05, + "loss": 0.3069722652435303, + "step": 1801 + }, + { + "epoch": 0.8906462374891881, + "grad_norm": 1.0432479678380786, + "learning_rate": 1.2620402412366006e-05, + "loss": 0.30594444274902344, + "step": 1802 + }, + { + "epoch": 0.8911404917830223, + "grad_norm": 1.224543789313884, + "learning_rate": 1.2612516451922442e-05, + "loss": 0.278346985578537, + "step": 1803 + }, + { + "epoch": 0.8916347460768566, + "grad_norm": 1.2157715105375322, + "learning_rate": 1.2604628747400227e-05, + "loss": 0.2985970973968506, + "step": 1804 + }, + { + "epoch": 0.8921290003706908, + "grad_norm": 1.1226040901686842, + "learning_rate": 1.259673930406507e-05, + "loss": 0.31054627895355225, + "step": 1805 + }, + { + "epoch": 0.8926232546645249, + "grad_norm": 1.1289150487077437, + "learning_rate": 1.258884812718386e-05, + "loss": 0.28903907537460327, + "step": 1806 + }, + { + "epoch": 0.8931175089583591, + "grad_norm": 1.132448586420589, + "learning_rate": 1.258095522202463e-05, + "loss": 0.2937915027141571, + "step": 1807 + }, + { + "epoch": 0.8936117632521933, + "grad_norm": 1.251676196860064, + "learning_rate": 1.257306059385657e-05, + "loss": 0.3038950562477112, + "step": 1808 + }, + { + "epoch": 0.8941060175460275, + "grad_norm": 1.235431629529867, + "learning_rate": 1.2565164247950023e-05, + "loss": 0.3081057071685791, + "step": 1809 + }, + { + "epoch": 0.8946002718398616, + "grad_norm": 1.1023391071403523, + "learning_rate": 1.2557266189576478e-05, + "loss": 0.2608702480792999, + "step": 1810 + }, + { + "epoch": 0.8950945261336958, + "grad_norm": 1.1733196648921136, + "learning_rate": 1.254936642400856e-05, + "loss": 0.2829548120498657, + "step": 1811 + }, + { + "epoch": 0.89558878042753, + "grad_norm": 1.1986500261226571, + "learning_rate": 1.2541464956520045e-05, + "loss": 0.3157985508441925, + "step": 1812 + }, + { + "epoch": 0.8960830347213642, + "grad_norm": 1.1805347109907394, + "learning_rate": 1.2533561792385837e-05, + "loss": 0.2948974370956421, + "step": 1813 + }, + { + "epoch": 0.8965772890151983, + "grad_norm": 1.1460952132203441, + "learning_rate": 1.252565693688198e-05, + "loss": 0.3011903166770935, + "step": 1814 + }, + { + "epoch": 0.8970715433090325, + "grad_norm": 1.3055245186221631, + "learning_rate": 1.2517750395285635e-05, + "loss": 0.3570353388786316, + "step": 1815 + }, + { + "epoch": 0.8975657976028667, + "grad_norm": 1.1337741379781219, + "learning_rate": 1.2509842172875105e-05, + "loss": 0.30166712403297424, + "step": 1816 + }, + { + "epoch": 0.8980600518967009, + "grad_norm": 1.104423129790351, + "learning_rate": 1.2501932274929797e-05, + "loss": 0.3260636329650879, + "step": 1817 + }, + { + "epoch": 0.898554306190535, + "grad_norm": 1.0975906386988825, + "learning_rate": 1.2494020706730251e-05, + "loss": 0.31647035479545593, + "step": 1818 + }, + { + "epoch": 0.8990485604843692, + "grad_norm": 1.2099925292750648, + "learning_rate": 1.2486107473558118e-05, + "loss": 0.3059273064136505, + "step": 1819 + }, + { + "epoch": 0.8995428147782034, + "grad_norm": 1.1238527206258473, + "learning_rate": 1.247819258069616e-05, + "loss": 0.31050577759742737, + "step": 1820 + }, + { + "epoch": 0.9000370690720375, + "grad_norm": 1.167261413544568, + "learning_rate": 1.2470276033428241e-05, + "loss": 0.3199779689311981, + "step": 1821 + }, + { + "epoch": 0.9005313233658717, + "grad_norm": 1.1634621252313533, + "learning_rate": 1.2462357837039338e-05, + "loss": 0.31346091628074646, + "step": 1822 + }, + { + "epoch": 0.9010255776597059, + "grad_norm": 1.7712393639688087, + "learning_rate": 1.245443799681553e-05, + "loss": 0.31128326058387756, + "step": 1823 + }, + { + "epoch": 0.9015198319535401, + "grad_norm": 1.0665988205220116, + "learning_rate": 1.244651651804398e-05, + "loss": 0.27540329098701477, + "step": 1824 + }, + { + "epoch": 0.9020140862473742, + "grad_norm": 1.08908725997666, + "learning_rate": 1.243859340601296e-05, + "loss": 0.2613363265991211, + "step": 1825 + }, + { + "epoch": 0.9025083405412084, + "grad_norm": 1.1499718586586674, + "learning_rate": 1.2430668666011825e-05, + "loss": 0.30530184507369995, + "step": 1826 + }, + { + "epoch": 0.9030025948350426, + "grad_norm": 1.0907140946424856, + "learning_rate": 1.2422742303331022e-05, + "loss": 0.3223349153995514, + "step": 1827 + }, + { + "epoch": 0.9034968491288768, + "grad_norm": 1.131086049145241, + "learning_rate": 1.2414814323262067e-05, + "loss": 0.32017287611961365, + "step": 1828 + }, + { + "epoch": 0.9039911034227109, + "grad_norm": 1.2183101338845472, + "learning_rate": 1.2406884731097582e-05, + "loss": 0.2965891361236572, + "step": 1829 + }, + { + "epoch": 0.9044853577165451, + "grad_norm": 1.535326476461108, + "learning_rate": 1.2398953532131235e-05, + "loss": 0.3517727851867676, + "step": 1830 + }, + { + "epoch": 0.9049796120103794, + "grad_norm": 1.0055415215772612, + "learning_rate": 1.2391020731657788e-05, + "loss": 0.26107311248779297, + "step": 1831 + }, + { + "epoch": 0.9054738663042136, + "grad_norm": 1.16405975535122, + "learning_rate": 1.2383086334973065e-05, + "loss": 0.31327998638153076, + "step": 1832 + }, + { + "epoch": 0.9059681205980477, + "grad_norm": 1.1376729658041929, + "learning_rate": 1.2375150347373956e-05, + "loss": 0.2708127498626709, + "step": 1833 + }, + { + "epoch": 0.9064623748918819, + "grad_norm": 1.2578266997569258, + "learning_rate": 1.236721277415841e-05, + "loss": 0.3264025151729584, + "step": 1834 + }, + { + "epoch": 0.9069566291857161, + "grad_norm": 1.1552886471917594, + "learning_rate": 1.2359273620625438e-05, + "loss": 0.3226723074913025, + "step": 1835 + }, + { + "epoch": 0.9074508834795503, + "grad_norm": 1.095230882373492, + "learning_rate": 1.2351332892075109e-05, + "loss": 0.2895771861076355, + "step": 1836 + }, + { + "epoch": 0.9079451377733844, + "grad_norm": 1.149733162695983, + "learning_rate": 1.234339059380854e-05, + "loss": 0.3316076397895813, + "step": 1837 + }, + { + "epoch": 0.9084393920672186, + "grad_norm": 1.10037368979265, + "learning_rate": 1.2335446731127887e-05, + "loss": 0.29858651757240295, + "step": 1838 + }, + { + "epoch": 0.9089336463610528, + "grad_norm": 1.2759313559643695, + "learning_rate": 1.2327501309336371e-05, + "loss": 0.31340792775154114, + "step": 1839 + }, + { + "epoch": 0.909427900654887, + "grad_norm": 1.038203202123546, + "learning_rate": 1.2319554333738236e-05, + "loss": 0.27344945073127747, + "step": 1840 + }, + { + "epoch": 0.9099221549487211, + "grad_norm": 1.1811761633875792, + "learning_rate": 1.2311605809638766e-05, + "loss": 0.27349725365638733, + "step": 1841 + }, + { + "epoch": 0.9104164092425553, + "grad_norm": 1.2931266398373575, + "learning_rate": 1.2303655742344292e-05, + "loss": 0.28933316469192505, + "step": 1842 + }, + { + "epoch": 0.9109106635363895, + "grad_norm": 1.1360201134878805, + "learning_rate": 1.2295704137162158e-05, + "loss": 0.3315466344356537, + "step": 1843 + }, + { + "epoch": 0.9114049178302237, + "grad_norm": 1.3735184410271417, + "learning_rate": 1.2287750999400743e-05, + "loss": 0.3227408528327942, + "step": 1844 + }, + { + "epoch": 0.9118991721240578, + "grad_norm": 1.1237568254849295, + "learning_rate": 1.2279796334369447e-05, + "loss": 0.30476877093315125, + "step": 1845 + }, + { + "epoch": 0.912393426417892, + "grad_norm": 1.1863082805694927, + "learning_rate": 1.2271840147378697e-05, + "loss": 0.29941046237945557, + "step": 1846 + }, + { + "epoch": 0.9128876807117262, + "grad_norm": 1.040665730868043, + "learning_rate": 1.2263882443739923e-05, + "loss": 0.26635122299194336, + "step": 1847 + }, + { + "epoch": 0.9133819350055603, + "grad_norm": 1.2009768589181191, + "learning_rate": 1.2255923228765574e-05, + "loss": 0.32384809851646423, + "step": 1848 + }, + { + "epoch": 0.9138761892993945, + "grad_norm": 1.1005403546735195, + "learning_rate": 1.2247962507769113e-05, + "loss": 0.2830178141593933, + "step": 1849 + }, + { + "epoch": 0.9143704435932287, + "grad_norm": 1.146384025635135, + "learning_rate": 1.2240000286065003e-05, + "loss": 0.32860931754112244, + "step": 1850 + }, + { + "epoch": 0.9148646978870629, + "grad_norm": 1.1448106720128721, + "learning_rate": 1.2232036568968703e-05, + "loss": 0.2820647954940796, + "step": 1851 + }, + { + "epoch": 0.915358952180897, + "grad_norm": 1.2180250787611469, + "learning_rate": 1.2224071361796685e-05, + "loss": 0.3368694484233856, + "step": 1852 + }, + { + "epoch": 0.9158532064747312, + "grad_norm": 1.1414152376911786, + "learning_rate": 1.2216104669866405e-05, + "loss": 0.32594096660614014, + "step": 1853 + }, + { + "epoch": 0.9163474607685654, + "grad_norm": 1.129839927585001, + "learning_rate": 1.2208136498496307e-05, + "loss": 0.3383556008338928, + "step": 1854 + }, + { + "epoch": 0.9168417150623996, + "grad_norm": 1.1516691565096748, + "learning_rate": 1.2200166853005837e-05, + "loss": 0.2655363976955414, + "step": 1855 + }, + { + "epoch": 0.9173359693562337, + "grad_norm": 1.067780593954706, + "learning_rate": 1.2192195738715414e-05, + "loss": 0.30512773990631104, + "step": 1856 + }, + { + "epoch": 0.917830223650068, + "grad_norm": 1.3304322049937938, + "learning_rate": 1.2184223160946433e-05, + "loss": 0.34026995301246643, + "step": 1857 + }, + { + "epoch": 0.9183244779439022, + "grad_norm": 1.2079696433735554, + "learning_rate": 1.2176249125021281e-05, + "loss": 0.29324328899383545, + "step": 1858 + }, + { + "epoch": 0.9188187322377364, + "grad_norm": 1.454623275441196, + "learning_rate": 1.2168273636263308e-05, + "loss": 0.3114206790924072, + "step": 1859 + }, + { + "epoch": 0.9193129865315705, + "grad_norm": 1.1301917440411622, + "learning_rate": 1.2160296699996839e-05, + "loss": 0.2829141914844513, + "step": 1860 + }, + { + "epoch": 0.9198072408254047, + "grad_norm": 1.0721269081592821, + "learning_rate": 1.2152318321547156e-05, + "loss": 0.2735600769519806, + "step": 1861 + }, + { + "epoch": 0.9203014951192389, + "grad_norm": 1.0465335380212768, + "learning_rate": 1.2144338506240519e-05, + "loss": 0.3160930573940277, + "step": 1862 + }, + { + "epoch": 0.9207957494130731, + "grad_norm": 1.0735769631967078, + "learning_rate": 1.2136357259404128e-05, + "loss": 0.26677393913269043, + "step": 1863 + }, + { + "epoch": 0.9212900037069072, + "grad_norm": 1.1305004585474958, + "learning_rate": 1.2128374586366159e-05, + "loss": 0.33033064007759094, + "step": 1864 + }, + { + "epoch": 0.9217842580007414, + "grad_norm": 1.1210908469065626, + "learning_rate": 1.2120390492455727e-05, + "loss": 0.28271663188934326, + "step": 1865 + }, + { + "epoch": 0.9222785122945756, + "grad_norm": 1.1196923913120616, + "learning_rate": 1.21124049830029e-05, + "loss": 0.3116013705730438, + "step": 1866 + }, + { + "epoch": 0.9227727665884098, + "grad_norm": 1.1258678919425735, + "learning_rate": 1.2104418063338686e-05, + "loss": 0.30614158511161804, + "step": 1867 + }, + { + "epoch": 0.9232670208822439, + "grad_norm": 1.2128311616527454, + "learning_rate": 1.2096429738795041e-05, + "loss": 0.34351983666419983, + "step": 1868 + }, + { + "epoch": 0.9237612751760781, + "grad_norm": 1.2814647055659063, + "learning_rate": 1.2088440014704858e-05, + "loss": 0.31006965041160583, + "step": 1869 + }, + { + "epoch": 0.9242555294699123, + "grad_norm": 1.093225958461299, + "learning_rate": 1.2080448896401964e-05, + "loss": 0.2671147584915161, + "step": 1870 + }, + { + "epoch": 0.9247497837637465, + "grad_norm": 1.2298582810409653, + "learning_rate": 1.207245638922111e-05, + "loss": 0.29123416543006897, + "step": 1871 + }, + { + "epoch": 0.9252440380575806, + "grad_norm": 1.1613532669189326, + "learning_rate": 1.2064462498497984e-05, + "loss": 0.31838539242744446, + "step": 1872 + }, + { + "epoch": 0.9257382923514148, + "grad_norm": 1.1861407153761483, + "learning_rate": 1.205646722956919e-05, + "loss": 0.3158906102180481, + "step": 1873 + }, + { + "epoch": 0.926232546645249, + "grad_norm": 1.2339017273841688, + "learning_rate": 1.2048470587772257e-05, + "loss": 0.3679552674293518, + "step": 1874 + }, + { + "epoch": 0.9267268009390831, + "grad_norm": 1.1210108605660978, + "learning_rate": 1.204047257844563e-05, + "loss": 0.2891008257865906, + "step": 1875 + }, + { + "epoch": 0.9272210552329173, + "grad_norm": 1.1110723692294957, + "learning_rate": 1.2032473206928663e-05, + "loss": 0.3207235634326935, + "step": 1876 + }, + { + "epoch": 0.9277153095267515, + "grad_norm": 1.203189154519193, + "learning_rate": 1.2024472478561624e-05, + "loss": 0.2710658311843872, + "step": 1877 + }, + { + "epoch": 0.9282095638205857, + "grad_norm": 1.1156076578026985, + "learning_rate": 1.2016470398685685e-05, + "loss": 0.2554836869239807, + "step": 1878 + }, + { + "epoch": 0.9287038181144198, + "grad_norm": 1.079454168196498, + "learning_rate": 1.2008466972642921e-05, + "loss": 0.2822943329811096, + "step": 1879 + }, + { + "epoch": 0.929198072408254, + "grad_norm": 1.2007950112208574, + "learning_rate": 1.20004622057763e-05, + "loss": 0.3447754681110382, + "step": 1880 + }, + { + "epoch": 0.9296923267020882, + "grad_norm": 1.1885607345269107, + "learning_rate": 1.1992456103429694e-05, + "loss": 0.3009227514266968, + "step": 1881 + }, + { + "epoch": 0.9301865809959224, + "grad_norm": 1.3491102685763696, + "learning_rate": 1.1984448670947863e-05, + "loss": 0.33154594898223877, + "step": 1882 + }, + { + "epoch": 0.9306808352897565, + "grad_norm": 1.7075348805187878, + "learning_rate": 1.1976439913676457e-05, + "loss": 0.32905343174934387, + "step": 1883 + }, + { + "epoch": 0.9311750895835907, + "grad_norm": 1.2010662669423082, + "learning_rate": 1.1968429836962e-05, + "loss": 0.34757447242736816, + "step": 1884 + }, + { + "epoch": 0.931669343877425, + "grad_norm": 1.2626693752273819, + "learning_rate": 1.1960418446151912e-05, + "loss": 0.29980987310409546, + "step": 1885 + }, + { + "epoch": 0.9321635981712592, + "grad_norm": 1.081439601568963, + "learning_rate": 1.1952405746594477e-05, + "loss": 0.3106808662414551, + "step": 1886 + }, + { + "epoch": 0.9326578524650933, + "grad_norm": 1.2465315131717423, + "learning_rate": 1.1944391743638863e-05, + "loss": 0.3222411572933197, + "step": 1887 + }, + { + "epoch": 0.9331521067589275, + "grad_norm": 1.117897007008322, + "learning_rate": 1.1936376442635104e-05, + "loss": 0.3365646302700043, + "step": 1888 + }, + { + "epoch": 0.9336463610527617, + "grad_norm": 1.2223325106102665, + "learning_rate": 1.1928359848934101e-05, + "loss": 0.32500627636909485, + "step": 1889 + }, + { + "epoch": 0.9341406153465959, + "grad_norm": 1.1692844365001853, + "learning_rate": 1.1920341967887614e-05, + "loss": 0.31395500898361206, + "step": 1890 + }, + { + "epoch": 0.93463486964043, + "grad_norm": 1.084320264091655, + "learning_rate": 1.1912322804848268e-05, + "loss": 0.3060624301433563, + "step": 1891 + }, + { + "epoch": 0.9351291239342642, + "grad_norm": 1.042165685734395, + "learning_rate": 1.190430236516954e-05, + "loss": 0.2644454836845398, + "step": 1892 + }, + { + "epoch": 0.9356233782280984, + "grad_norm": 1.2086818065931575, + "learning_rate": 1.1896280654205765e-05, + "loss": 0.33404678106307983, + "step": 1893 + }, + { + "epoch": 0.9361176325219326, + "grad_norm": 1.0362894963118763, + "learning_rate": 1.1888257677312119e-05, + "loss": 0.28557512164115906, + "step": 1894 + }, + { + "epoch": 0.9366118868157667, + "grad_norm": 1.1281245501630466, + "learning_rate": 1.1880233439844623e-05, + "loss": 0.3332308530807495, + "step": 1895 + }, + { + "epoch": 0.9371061411096009, + "grad_norm": 1.0648316720915905, + "learning_rate": 1.1872207947160155e-05, + "loss": 0.3274528384208679, + "step": 1896 + }, + { + "epoch": 0.9376003954034351, + "grad_norm": 1.168900116977035, + "learning_rate": 1.1864181204616404e-05, + "loss": 0.297880083322525, + "step": 1897 + }, + { + "epoch": 0.9380946496972693, + "grad_norm": 1.1118774536365064, + "learning_rate": 1.1856153217571924e-05, + "loss": 0.3404296040534973, + "step": 1898 + }, + { + "epoch": 0.9385889039911034, + "grad_norm": 1.7308625403608067, + "learning_rate": 1.1848123991386073e-05, + "loss": 0.32343849539756775, + "step": 1899 + }, + { + "epoch": 0.9390831582849376, + "grad_norm": 1.0296882466024648, + "learning_rate": 1.1840093531419052e-05, + "loss": 0.26679158210754395, + "step": 1900 + }, + { + "epoch": 0.9395774125787718, + "grad_norm": 1.0019153721179144, + "learning_rate": 1.1832061843031884e-05, + "loss": 0.28106996417045593, + "step": 1901 + }, + { + "epoch": 0.9400716668726059, + "grad_norm": 1.1236721425678955, + "learning_rate": 1.1824028931586406e-05, + "loss": 0.28356847167015076, + "step": 1902 + }, + { + "epoch": 0.9405659211664401, + "grad_norm": 1.2443758247501144, + "learning_rate": 1.1815994802445274e-05, + "loss": 0.3256348669528961, + "step": 1903 + }, + { + "epoch": 0.9410601754602743, + "grad_norm": 1.1514727386744015, + "learning_rate": 1.1807959460971958e-05, + "loss": 0.2781906723976135, + "step": 1904 + }, + { + "epoch": 0.9415544297541085, + "grad_norm": 1.2599759308188183, + "learning_rate": 1.1799922912530741e-05, + "loss": 0.3129916787147522, + "step": 1905 + }, + { + "epoch": 0.9420486840479426, + "grad_norm": 1.1353254220103308, + "learning_rate": 1.1791885162486705e-05, + "loss": 0.281986266374588, + "step": 1906 + }, + { + "epoch": 0.9425429383417768, + "grad_norm": 1.2313275172087987, + "learning_rate": 1.1783846216205734e-05, + "loss": 0.33587342500686646, + "step": 1907 + }, + { + "epoch": 0.943037192635611, + "grad_norm": 1.0408682927660702, + "learning_rate": 1.1775806079054522e-05, + "loss": 0.27715635299682617, + "step": 1908 + }, + { + "epoch": 0.9435314469294452, + "grad_norm": 1.1581221243071849, + "learning_rate": 1.1767764756400541e-05, + "loss": 0.3190307915210724, + "step": 1909 + }, + { + "epoch": 0.9440257012232793, + "grad_norm": 1.1962319364965919, + "learning_rate": 1.175972225361207e-05, + "loss": 0.29336807131767273, + "step": 1910 + }, + { + "epoch": 0.9445199555171135, + "grad_norm": 1.1448708364637925, + "learning_rate": 1.1751678576058164e-05, + "loss": 0.3001596927642822, + "step": 1911 + }, + { + "epoch": 0.9450142098109477, + "grad_norm": 1.0832545536390727, + "learning_rate": 1.1743633729108672e-05, + "loss": 0.26952457427978516, + "step": 1912 + }, + { + "epoch": 0.945508464104782, + "grad_norm": 1.166519142960908, + "learning_rate": 1.1735587718134212e-05, + "loss": 0.3193609118461609, + "step": 1913 + }, + { + "epoch": 0.946002718398616, + "grad_norm": 1.2095746348772163, + "learning_rate": 1.172754054850619e-05, + "loss": 0.2810664176940918, + "step": 1914 + }, + { + "epoch": 0.9464969726924503, + "grad_norm": 1.1743627712454017, + "learning_rate": 1.1719492225596783e-05, + "loss": 0.28850311040878296, + "step": 1915 + }, + { + "epoch": 0.9469912269862845, + "grad_norm": 1.1739524489187587, + "learning_rate": 1.1711442754778936e-05, + "loss": 0.32268932461738586, + "step": 1916 + }, + { + "epoch": 0.9474854812801187, + "grad_norm": 1.2236575262685914, + "learning_rate": 1.1703392141426356e-05, + "loss": 0.3149149715900421, + "step": 1917 + }, + { + "epoch": 0.9479797355739528, + "grad_norm": 1.0472038436966378, + "learning_rate": 1.1695340390913526e-05, + "loss": 0.2537482678890228, + "step": 1918 + }, + { + "epoch": 0.948473989867787, + "grad_norm": 1.1232208833213926, + "learning_rate": 1.168728750861567e-05, + "loss": 0.2611936330795288, + "step": 1919 + }, + { + "epoch": 0.9489682441616212, + "grad_norm": 1.0077623948815433, + "learning_rate": 1.1679233499908781e-05, + "loss": 0.263653427362442, + "step": 1920 + }, + { + "epoch": 0.9494624984554554, + "grad_norm": 1.1707561168968341, + "learning_rate": 1.1671178370169604e-05, + "loss": 0.3122594952583313, + "step": 1921 + }, + { + "epoch": 0.9499567527492895, + "grad_norm": 1.1924449722361925, + "learning_rate": 1.1663122124775626e-05, + "loss": 0.3101043701171875, + "step": 1922 + }, + { + "epoch": 0.9504510070431237, + "grad_norm": 1.129901320884474, + "learning_rate": 1.1655064769105077e-05, + "loss": 0.295572966337204, + "step": 1923 + }, + { + "epoch": 0.9509452613369579, + "grad_norm": 1.1537509505815167, + "learning_rate": 1.1647006308536937e-05, + "loss": 0.29732125997543335, + "step": 1924 + }, + { + "epoch": 0.951439515630792, + "grad_norm": 1.1914038253365087, + "learning_rate": 1.1638946748450922e-05, + "loss": 0.32320737838745117, + "step": 1925 + }, + { + "epoch": 0.9519337699246262, + "grad_norm": 1.2581984463314084, + "learning_rate": 1.1630886094227471e-05, + "loss": 0.3306753933429718, + "step": 1926 + }, + { + "epoch": 0.9524280242184604, + "grad_norm": 1.0367245477692144, + "learning_rate": 1.1622824351247767e-05, + "loss": 0.2368355095386505, + "step": 1927 + }, + { + "epoch": 0.9529222785122946, + "grad_norm": 1.2216253394681036, + "learning_rate": 1.1614761524893715e-05, + "loss": 0.28470784425735474, + "step": 1928 + }, + { + "epoch": 0.9534165328061287, + "grad_norm": 1.1721810384499396, + "learning_rate": 1.160669762054794e-05, + "loss": 0.34468895196914673, + "step": 1929 + }, + { + "epoch": 0.9539107870999629, + "grad_norm": 1.1277795177992218, + "learning_rate": 1.1598632643593787e-05, + "loss": 0.30562442541122437, + "step": 1930 + }, + { + "epoch": 0.9544050413937971, + "grad_norm": 1.2141650113141733, + "learning_rate": 1.159056659941533e-05, + "loss": 0.2861478924751282, + "step": 1931 + }, + { + "epoch": 0.9548992956876313, + "grad_norm": 1.0692532214940453, + "learning_rate": 1.1582499493397332e-05, + "loss": 0.32385969161987305, + "step": 1932 + }, + { + "epoch": 0.9553935499814654, + "grad_norm": 1.173323189937386, + "learning_rate": 1.1574431330925287e-05, + "loss": 0.2935449481010437, + "step": 1933 + }, + { + "epoch": 0.9558878042752996, + "grad_norm": 1.1041433205065538, + "learning_rate": 1.156636211738538e-05, + "loss": 0.29380083084106445, + "step": 1934 + }, + { + "epoch": 0.9563820585691338, + "grad_norm": 1.1455066452691371, + "learning_rate": 1.1558291858164503e-05, + "loss": 0.2957204282283783, + "step": 1935 + }, + { + "epoch": 0.956876312862968, + "grad_norm": 1.084977751415868, + "learning_rate": 1.1550220558650246e-05, + "loss": 0.26402851939201355, + "step": 1936 + }, + { + "epoch": 0.9573705671568021, + "grad_norm": 1.1085858464768976, + "learning_rate": 1.1542148224230897e-05, + "loss": 0.29163527488708496, + "step": 1937 + }, + { + "epoch": 0.9578648214506363, + "grad_norm": 1.2120558942254267, + "learning_rate": 1.1534074860295426e-05, + "loss": 0.302470326423645, + "step": 1938 + }, + { + "epoch": 0.9583590757444705, + "grad_norm": 1.1861857419569999, + "learning_rate": 1.15260004722335e-05, + "loss": 0.25946593284606934, + "step": 1939 + }, + { + "epoch": 0.9588533300383048, + "grad_norm": 1.1153985574382288, + "learning_rate": 1.1517925065435457e-05, + "loss": 0.2680559456348419, + "step": 1940 + }, + { + "epoch": 0.9593475843321388, + "grad_norm": 1.2104349484077064, + "learning_rate": 1.1509848645292334e-05, + "loss": 0.2684473991394043, + "step": 1941 + }, + { + "epoch": 0.959841838625973, + "grad_norm": 1.245187124369965, + "learning_rate": 1.1501771217195827e-05, + "loss": 0.2795519232749939, + "step": 1942 + }, + { + "epoch": 0.9603360929198073, + "grad_norm": 1.2532047895072767, + "learning_rate": 1.1493692786538313e-05, + "loss": 0.35209575295448303, + "step": 1943 + }, + { + "epoch": 0.9608303472136415, + "grad_norm": 1.176019791514668, + "learning_rate": 1.1485613358712839e-05, + "loss": 0.3058928847312927, + "step": 1944 + }, + { + "epoch": 0.9613246015074756, + "grad_norm": 1.103375830615649, + "learning_rate": 1.1477532939113112e-05, + "loss": 0.2889159619808197, + "step": 1945 + }, + { + "epoch": 0.9618188558013098, + "grad_norm": 1.175759039350938, + "learning_rate": 1.1469451533133506e-05, + "loss": 0.30782538652420044, + "step": 1946 + }, + { + "epoch": 0.962313110095144, + "grad_norm": 1.1326992133409532, + "learning_rate": 1.1461369146169052e-05, + "loss": 0.3091726005077362, + "step": 1947 + }, + { + "epoch": 0.9628073643889782, + "grad_norm": 1.2061917553730328, + "learning_rate": 1.1453285783615438e-05, + "loss": 0.3287050724029541, + "step": 1948 + }, + { + "epoch": 0.9633016186828123, + "grad_norm": 1.1941959404182023, + "learning_rate": 1.1445201450868998e-05, + "loss": 0.31267625093460083, + "step": 1949 + }, + { + "epoch": 0.9637958729766465, + "grad_norm": 1.1346278168962094, + "learning_rate": 1.1437116153326719e-05, + "loss": 0.30775952339172363, + "step": 1950 + }, + { + "epoch": 0.9642901272704807, + "grad_norm": 1.292541938462464, + "learning_rate": 1.142902989638623e-05, + "loss": 0.3825497329235077, + "step": 1951 + }, + { + "epoch": 0.9647843815643148, + "grad_norm": 1.0454710330230295, + "learning_rate": 1.1420942685445801e-05, + "loss": 0.2866062521934509, + "step": 1952 + }, + { + "epoch": 0.965278635858149, + "grad_norm": 1.144633580750803, + "learning_rate": 1.1412854525904335e-05, + "loss": 0.27787062525749207, + "step": 1953 + }, + { + "epoch": 0.9657728901519832, + "grad_norm": 1.1290436448297894, + "learning_rate": 1.1404765423161381e-05, + "loss": 0.302572101354599, + "step": 1954 + }, + { + "epoch": 0.9662671444458174, + "grad_norm": 1.0781086639824042, + "learning_rate": 1.1396675382617097e-05, + "loss": 0.29608359932899475, + "step": 1955 + }, + { + "epoch": 0.9667613987396515, + "grad_norm": 1.1646658995895742, + "learning_rate": 1.1388584409672285e-05, + "loss": 0.28057801723480225, + "step": 1956 + }, + { + "epoch": 0.9672556530334857, + "grad_norm": 1.1188617227766138, + "learning_rate": 1.1380492509728363e-05, + "loss": 0.29628869891166687, + "step": 1957 + }, + { + "epoch": 0.9677499073273199, + "grad_norm": 1.1207660926511307, + "learning_rate": 1.1372399688187365e-05, + "loss": 0.29254984855651855, + "step": 1958 + }, + { + "epoch": 0.9682441616211541, + "grad_norm": 1.10665523309967, + "learning_rate": 1.1364305950451946e-05, + "loss": 0.32925280928611755, + "step": 1959 + }, + { + "epoch": 0.9687384159149882, + "grad_norm": 1.108029328920716, + "learning_rate": 1.1356211301925367e-05, + "loss": 0.3072258234024048, + "step": 1960 + }, + { + "epoch": 0.9692326702088224, + "grad_norm": 1.1133536367191044, + "learning_rate": 1.1348115748011499e-05, + "loss": 0.29737845063209534, + "step": 1961 + }, + { + "epoch": 0.9697269245026566, + "grad_norm": 1.1169451234105505, + "learning_rate": 1.1340019294114822e-05, + "loss": 0.27369949221611023, + "step": 1962 + }, + { + "epoch": 0.9702211787964908, + "grad_norm": 1.2861478922811351, + "learning_rate": 1.1331921945640408e-05, + "loss": 0.33116602897644043, + "step": 1963 + }, + { + "epoch": 0.9707154330903249, + "grad_norm": 1.9398235156973715, + "learning_rate": 1.1323823707993937e-05, + "loss": 0.2620438039302826, + "step": 1964 + }, + { + "epoch": 0.9712096873841591, + "grad_norm": 1.1505189829247824, + "learning_rate": 1.1315724586581673e-05, + "loss": 0.3187680244445801, + "step": 1965 + }, + { + "epoch": 0.9717039416779933, + "grad_norm": 1.2391813787863328, + "learning_rate": 1.1307624586810472e-05, + "loss": 0.3675233721733093, + "step": 1966 + }, + { + "epoch": 0.9721981959718276, + "grad_norm": 1.2521490817049854, + "learning_rate": 1.1299523714087784e-05, + "loss": 0.31064945459365845, + "step": 1967 + }, + { + "epoch": 0.9726924502656616, + "grad_norm": 1.1166975993354054, + "learning_rate": 1.1291421973821632e-05, + "loss": 0.2941773235797882, + "step": 1968 + }, + { + "epoch": 0.9731867045594959, + "grad_norm": 1.2565504643296834, + "learning_rate": 1.128331937142062e-05, + "loss": 0.3443846106529236, + "step": 1969 + }, + { + "epoch": 0.9736809588533301, + "grad_norm": 1.1142268279429304, + "learning_rate": 1.1275215912293933e-05, + "loss": 0.2815151810646057, + "step": 1970 + }, + { + "epoch": 0.9741752131471643, + "grad_norm": 1.1622346059327586, + "learning_rate": 1.1267111601851327e-05, + "loss": 0.2886476516723633, + "step": 1971 + }, + { + "epoch": 0.9746694674409984, + "grad_norm": 1.0942194208380682, + "learning_rate": 1.1259006445503116e-05, + "loss": 0.2692835330963135, + "step": 1972 + }, + { + "epoch": 0.9751637217348326, + "grad_norm": 1.1112683317978183, + "learning_rate": 1.1250900448660192e-05, + "loss": 0.2748587727546692, + "step": 1973 + }, + { + "epoch": 0.9756579760286668, + "grad_norm": 1.192989589829818, + "learning_rate": 1.1242793616734002e-05, + "loss": 0.2963098883628845, + "step": 1974 + }, + { + "epoch": 0.976152230322501, + "grad_norm": 1.1305326657315258, + "learning_rate": 1.1234685955136552e-05, + "loss": 0.28353193402290344, + "step": 1975 + }, + { + "epoch": 0.9766464846163351, + "grad_norm": 1.1967273051238179, + "learning_rate": 1.1226577469280397e-05, + "loss": 0.3308493494987488, + "step": 1976 + }, + { + "epoch": 0.9771407389101693, + "grad_norm": 1.096933031801606, + "learning_rate": 1.1218468164578653e-05, + "loss": 0.26923754811286926, + "step": 1977 + }, + { + "epoch": 0.9776349932040035, + "grad_norm": 1.5091635403311783, + "learning_rate": 1.1210358046444968e-05, + "loss": 0.2730574905872345, + "step": 1978 + }, + { + "epoch": 0.9781292474978376, + "grad_norm": 1.1338996219219686, + "learning_rate": 1.1202247120293548e-05, + "loss": 0.26464858651161194, + "step": 1979 + }, + { + "epoch": 0.9786235017916718, + "grad_norm": 1.2694994457222093, + "learning_rate": 1.1194135391539127e-05, + "loss": 0.30095499753952026, + "step": 1980 + }, + { + "epoch": 0.979117756085506, + "grad_norm": 1.3227283597348862, + "learning_rate": 1.1186022865596983e-05, + "loss": 0.3418167233467102, + "step": 1981 + }, + { + "epoch": 0.9796120103793402, + "grad_norm": 1.2780598996117225, + "learning_rate": 1.117790954788292e-05, + "loss": 0.28735262155532837, + "step": 1982 + }, + { + "epoch": 0.9801062646731743, + "grad_norm": 1.109707631385258, + "learning_rate": 1.116979544381327e-05, + "loss": 0.26816800236701965, + "step": 1983 + }, + { + "epoch": 0.9806005189670085, + "grad_norm": 1.1873089360962268, + "learning_rate": 1.1161680558804897e-05, + "loss": 0.31004661321640015, + "step": 1984 + }, + { + "epoch": 0.9810947732608427, + "grad_norm": 1.2669673078204273, + "learning_rate": 1.1153564898275184e-05, + "loss": 0.33103084564208984, + "step": 1985 + }, + { + "epoch": 0.9815890275546769, + "grad_norm": 1.3375894512262838, + "learning_rate": 1.1145448467642021e-05, + "loss": 0.3804841637611389, + "step": 1986 + }, + { + "epoch": 0.982083281848511, + "grad_norm": 1.2029739003434823, + "learning_rate": 1.1137331272323834e-05, + "loss": 0.31861352920532227, + "step": 1987 + }, + { + "epoch": 0.9825775361423452, + "grad_norm": 1.1954996526655464, + "learning_rate": 1.1129213317739539e-05, + "loss": 0.3022298216819763, + "step": 1988 + }, + { + "epoch": 0.9830717904361794, + "grad_norm": 1.3466664334904774, + "learning_rate": 1.1121094609308564e-05, + "loss": 0.38203683495521545, + "step": 1989 + }, + { + "epoch": 0.9835660447300136, + "grad_norm": 1.215882197519198, + "learning_rate": 1.1112975152450848e-05, + "loss": 0.3105717897415161, + "step": 1990 + }, + { + "epoch": 0.9840602990238477, + "grad_norm": 1.2066484647947713, + "learning_rate": 1.1104854952586827e-05, + "loss": 0.31930285692214966, + "step": 1991 + }, + { + "epoch": 0.9845545533176819, + "grad_norm": 1.1639723195264664, + "learning_rate": 1.1096734015137422e-05, + "loss": 0.3167966902256012, + "step": 1992 + }, + { + "epoch": 0.9850488076115161, + "grad_norm": 1.168704133231974, + "learning_rate": 1.1088612345524059e-05, + "loss": 0.2693050801753998, + "step": 1993 + }, + { + "epoch": 0.9855430619053503, + "grad_norm": 1.0985586655404702, + "learning_rate": 1.1080489949168651e-05, + "loss": 0.27986466884613037, + "step": 1994 + }, + { + "epoch": 0.9860373161991844, + "grad_norm": 1.1481757517161775, + "learning_rate": 1.1072366831493589e-05, + "loss": 0.26814526319503784, + "step": 1995 + }, + { + "epoch": 0.9865315704930187, + "grad_norm": 1.146921609246337, + "learning_rate": 1.1064242997921753e-05, + "loss": 0.31393951177597046, + "step": 1996 + }, + { + "epoch": 0.9870258247868529, + "grad_norm": 1.1375630444026625, + "learning_rate": 1.1056118453876496e-05, + "loss": 0.2958461344242096, + "step": 1997 + }, + { + "epoch": 0.9875200790806871, + "grad_norm": 1.137037421352785, + "learning_rate": 1.1047993204781652e-05, + "loss": 0.29744619131088257, + "step": 1998 + }, + { + "epoch": 0.9880143333745212, + "grad_norm": 1.1508003551512254, + "learning_rate": 1.1039867256061516e-05, + "loss": 0.29055094718933105, + "step": 1999 + }, + { + "epoch": 0.9885085876683554, + "grad_norm": 1.1632161121950038, + "learning_rate": 1.103174061314086e-05, + "loss": 0.29961663484573364, + "step": 2000 + }, + { + "epoch": 0.9890028419621896, + "grad_norm": 1.0841825843818378, + "learning_rate": 1.102361328144491e-05, + "loss": 0.34533610939979553, + "step": 2001 + }, + { + "epoch": 0.9894970962560238, + "grad_norm": 1.1849596678411713, + "learning_rate": 1.1015485266399362e-05, + "loss": 0.2994460463523865, + "step": 2002 + }, + { + "epoch": 0.9899913505498579, + "grad_norm": 1.2325420364808024, + "learning_rate": 1.1007356573430357e-05, + "loss": 0.34309566020965576, + "step": 2003 + }, + { + "epoch": 0.9904856048436921, + "grad_norm": 1.2050309252665437, + "learning_rate": 1.09992272079645e-05, + "loss": 0.3049868643283844, + "step": 2004 + }, + { + "epoch": 0.9909798591375263, + "grad_norm": 1.1759703775328856, + "learning_rate": 1.0991097175428833e-05, + "loss": 0.30586326122283936, + "step": 2005 + }, + { + "epoch": 0.9914741134313604, + "grad_norm": 1.1997965130034223, + "learning_rate": 1.0982966481250854e-05, + "loss": 0.29740482568740845, + "step": 2006 + }, + { + "epoch": 0.9919683677251946, + "grad_norm": 1.2400023524315222, + "learning_rate": 1.0974835130858497e-05, + "loss": 0.3218206465244293, + "step": 2007 + }, + { + "epoch": 0.9924626220190288, + "grad_norm": 1.1309419286206777, + "learning_rate": 1.0966703129680139e-05, + "loss": 0.2747582495212555, + "step": 2008 + }, + { + "epoch": 0.992956876312863, + "grad_norm": 1.2581670135770728, + "learning_rate": 1.0958570483144578e-05, + "loss": 0.33215245604515076, + "step": 2009 + }, + { + "epoch": 0.9934511306066971, + "grad_norm": 1.2834058413633842, + "learning_rate": 1.0950437196681061e-05, + "loss": 0.3149756193161011, + "step": 2010 + }, + { + "epoch": 0.9939453849005313, + "grad_norm": 1.1001136330607295, + "learning_rate": 1.0942303275719253e-05, + "loss": 0.2763513922691345, + "step": 2011 + }, + { + "epoch": 0.9944396391943655, + "grad_norm": 1.0592905887432897, + "learning_rate": 1.0934168725689239e-05, + "loss": 0.2818325161933899, + "step": 2012 + }, + { + "epoch": 0.9949338934881997, + "grad_norm": 1.1079515754649163, + "learning_rate": 1.0926033552021533e-05, + "loss": 0.2659858167171478, + "step": 2013 + }, + { + "epoch": 0.9954281477820338, + "grad_norm": 1.1926210163358253, + "learning_rate": 1.091789776014706e-05, + "loss": 0.30891451239585876, + "step": 2014 + }, + { + "epoch": 0.995922402075868, + "grad_norm": 1.2194298136031743, + "learning_rate": 1.0909761355497156e-05, + "loss": 0.33645111322402954, + "step": 2015 + }, + { + "epoch": 0.9964166563697022, + "grad_norm": 1.1110546475920504, + "learning_rate": 1.0901624343503571e-05, + "loss": 0.3086194097995758, + "step": 2016 + }, + { + "epoch": 0.9969109106635364, + "grad_norm": 1.0167201052564092, + "learning_rate": 1.089348672959846e-05, + "loss": 0.2614179253578186, + "step": 2017 + }, + { + "epoch": 0.9974051649573705, + "grad_norm": 1.2224853324284848, + "learning_rate": 1.088534851921437e-05, + "loss": 0.3300556540489197, + "step": 2018 + }, + { + "epoch": 0.9978994192512047, + "grad_norm": 1.1929848499106601, + "learning_rate": 1.087720971778426e-05, + "loss": 0.28443643450737, + "step": 2019 + }, + { + "epoch": 0.9983936735450389, + "grad_norm": 1.052677422924197, + "learning_rate": 1.0869070330741475e-05, + "loss": 0.2805534601211548, + "step": 2020 + }, + { + "epoch": 0.9988879278388731, + "grad_norm": 1.065568553175956, + "learning_rate": 1.0860930363519758e-05, + "loss": 0.28186699748039246, + "step": 2021 + }, + { + "epoch": 0.9993821821327072, + "grad_norm": 1.2171160812601536, + "learning_rate": 1.0852789821553228e-05, + "loss": 0.3527688980102539, + "step": 2022 + }, + { + "epoch": 0.9998764364265414, + "grad_norm": 1.2020406854373213, + "learning_rate": 1.08446487102764e-05, + "loss": 0.30708247423171997, + "step": 2023 + }, + { + "epoch": 1.0, + "grad_norm": 2.286184440614986, + "learning_rate": 1.083650703512416e-05, + "loss": 0.3015655279159546, + "step": 2024 + }, + { + "epoch": 1.0004942542938342, + "grad_norm": 1.2067651750081223, + "learning_rate": 1.0828364801531777e-05, + "loss": 0.29792484641075134, + "step": 2025 + }, + { + "epoch": 1.0009885085876684, + "grad_norm": 1.1529758757862274, + "learning_rate": 1.0820222014934887e-05, + "loss": 0.27995994687080383, + "step": 2026 + }, + { + "epoch": 1.0014827628815026, + "grad_norm": 1.115022133563525, + "learning_rate": 1.0812078680769501e-05, + "loss": 0.25797444581985474, + "step": 2027 + }, + { + "epoch": 1.0019770171753366, + "grad_norm": 1.1202805963305373, + "learning_rate": 1.0803934804471991e-05, + "loss": 0.2834373116493225, + "step": 2028 + }, + { + "epoch": 1.0024712714691708, + "grad_norm": 1.147731866533824, + "learning_rate": 1.079579039147909e-05, + "loss": 0.27055832743644714, + "step": 2029 + }, + { + "epoch": 1.002965525763005, + "grad_norm": 1.1916483552600579, + "learning_rate": 1.0787645447227897e-05, + "loss": 0.30029311776161194, + "step": 2030 + }, + { + "epoch": 1.0034597800568392, + "grad_norm": 1.1834514894044206, + "learning_rate": 1.0779499977155858e-05, + "loss": 0.2741442322731018, + "step": 2031 + }, + { + "epoch": 1.0039540343506734, + "grad_norm": 1.1233171341295944, + "learning_rate": 1.0771353986700767e-05, + "loss": 0.27097994089126587, + "step": 2032 + }, + { + "epoch": 1.0044482886445076, + "grad_norm": 1.1267943347727831, + "learning_rate": 1.0763207481300781e-05, + "loss": 0.2690125107765198, + "step": 2033 + }, + { + "epoch": 1.0049425429383418, + "grad_norm": 1.1312636860673373, + "learning_rate": 1.0755060466394383e-05, + "loss": 0.29656079411506653, + "step": 2034 + }, + { + "epoch": 1.005436797232176, + "grad_norm": 1.1729529368370135, + "learning_rate": 1.0746912947420407e-05, + "loss": 0.25291675329208374, + "step": 2035 + }, + { + "epoch": 1.00593105152601, + "grad_norm": 1.410951786073956, + "learning_rate": 1.0738764929818017e-05, + "loss": 0.26391562819480896, + "step": 2036 + }, + { + "epoch": 1.0064253058198442, + "grad_norm": 1.258204498994485, + "learning_rate": 1.073061641902672e-05, + "loss": 0.2850308418273926, + "step": 2037 + }, + { + "epoch": 1.0069195601136784, + "grad_norm": 1.1368887973206072, + "learning_rate": 1.0722467420486338e-05, + "loss": 0.2529013454914093, + "step": 2038 + }, + { + "epoch": 1.0074138144075127, + "grad_norm": 1.2420233139292696, + "learning_rate": 1.0714317939637028e-05, + "loss": 0.2577154040336609, + "step": 2039 + }, + { + "epoch": 1.0079080687013469, + "grad_norm": 1.1996492314644527, + "learning_rate": 1.0706167981919269e-05, + "loss": 0.28677526116371155, + "step": 2040 + }, + { + "epoch": 1.008402322995181, + "grad_norm": 1.210233649974949, + "learning_rate": 1.0698017552773859e-05, + "loss": 0.25146183371543884, + "step": 2041 + }, + { + "epoch": 1.0088965772890153, + "grad_norm": 1.217205041102825, + "learning_rate": 1.0689866657641899e-05, + "loss": 0.29958251118659973, + "step": 2042 + }, + { + "epoch": 1.0093908315828495, + "grad_norm": 1.2422486891064726, + "learning_rate": 1.0681715301964817e-05, + "loss": 0.28512266278266907, + "step": 2043 + }, + { + "epoch": 1.0098850858766835, + "grad_norm": 1.3312817373132209, + "learning_rate": 1.067356349118434e-05, + "loss": 0.29768145084381104, + "step": 2044 + }, + { + "epoch": 1.0103793401705177, + "grad_norm": 1.2397312600868813, + "learning_rate": 1.0665411230742498e-05, + "loss": 0.25144103169441223, + "step": 2045 + }, + { + "epoch": 1.0108735944643519, + "grad_norm": 1.6026936131359757, + "learning_rate": 1.0657258526081629e-05, + "loss": 0.2673259973526001, + "step": 2046 + }, + { + "epoch": 1.011367848758186, + "grad_norm": 1.2940971813114743, + "learning_rate": 1.0649105382644359e-05, + "loss": 0.2845848500728607, + "step": 2047 + }, + { + "epoch": 1.0118621030520203, + "grad_norm": 1.0898574113835153, + "learning_rate": 1.0640951805873607e-05, + "loss": 0.2569392919540405, + "step": 2048 + }, + { + "epoch": 1.0123563573458545, + "grad_norm": 1.2632947550014098, + "learning_rate": 1.0632797801212591e-05, + "loss": 0.250387966632843, + "step": 2049 + }, + { + "epoch": 1.0128506116396887, + "grad_norm": 1.233630096360243, + "learning_rate": 1.0624643374104804e-05, + "loss": 0.28228282928466797, + "step": 2050 + }, + { + "epoch": 1.0133448659335227, + "grad_norm": 1.0888042979148498, + "learning_rate": 1.0616488529994024e-05, + "loss": 0.24724754691123962, + "step": 2051 + }, + { + "epoch": 1.013839120227357, + "grad_norm": 1.2576287774069197, + "learning_rate": 1.0608333274324312e-05, + "loss": 0.268532395362854, + "step": 2052 + }, + { + "epoch": 1.014333374521191, + "grad_norm": 1.1578525571147846, + "learning_rate": 1.0600177612539995e-05, + "loss": 0.27454662322998047, + "step": 2053 + }, + { + "epoch": 1.0148276288150253, + "grad_norm": 1.2050116136682636, + "learning_rate": 1.0592021550085683e-05, + "loss": 0.27497538924217224, + "step": 2054 + }, + { + "epoch": 1.0153218831088595, + "grad_norm": 1.1358282649300115, + "learning_rate": 1.0583865092406237e-05, + "loss": 0.24480152130126953, + "step": 2055 + }, + { + "epoch": 1.0158161374026937, + "grad_norm": 1.1352545460867702, + "learning_rate": 1.0575708244946805e-05, + "loss": 0.23754069209098816, + "step": 2056 + }, + { + "epoch": 1.016310391696528, + "grad_norm": 1.150720407382798, + "learning_rate": 1.056755101315277e-05, + "loss": 0.24541275203227997, + "step": 2057 + }, + { + "epoch": 1.0168046459903621, + "grad_norm": 1.2022551315194179, + "learning_rate": 1.055939340246979e-05, + "loss": 0.27724504470825195, + "step": 2058 + }, + { + "epoch": 1.0172989002841961, + "grad_norm": 1.2400168112160508, + "learning_rate": 1.0551235418343766e-05, + "loss": 0.2869918942451477, + "step": 2059 + }, + { + "epoch": 1.0177931545780303, + "grad_norm": 1.2299839323583324, + "learning_rate": 1.0543077066220854e-05, + "loss": 0.27153679728507996, + "step": 2060 + }, + { + "epoch": 1.0182874088718645, + "grad_norm": 1.1366017541860491, + "learning_rate": 1.0534918351547454e-05, + "loss": 0.2611347436904907, + "step": 2061 + }, + { + "epoch": 1.0187816631656987, + "grad_norm": 1.1317421431613228, + "learning_rate": 1.0526759279770202e-05, + "loss": 0.26649200916290283, + "step": 2062 + }, + { + "epoch": 1.019275917459533, + "grad_norm": 1.0930466767865903, + "learning_rate": 1.0518599856335983e-05, + "loss": 0.25164204835891724, + "step": 2063 + }, + { + "epoch": 1.0197701717533671, + "grad_norm": 1.2027289451385044, + "learning_rate": 1.0510440086691911e-05, + "loss": 0.288251131772995, + "step": 2064 + }, + { + "epoch": 1.0202644260472014, + "grad_norm": 1.2837951062377317, + "learning_rate": 1.0502279976285325e-05, + "loss": 0.27177444100379944, + "step": 2065 + }, + { + "epoch": 1.0207586803410356, + "grad_norm": 1.222948820556725, + "learning_rate": 1.0494119530563812e-05, + "loss": 0.2723502218723297, + "step": 2066 + }, + { + "epoch": 1.0212529346348695, + "grad_norm": 1.214398839170698, + "learning_rate": 1.0485958754975156e-05, + "loss": 0.2704971432685852, + "step": 2067 + }, + { + "epoch": 1.0217471889287038, + "grad_norm": 1.267114179641731, + "learning_rate": 1.0477797654967376e-05, + "loss": 0.30302050709724426, + "step": 2068 + }, + { + "epoch": 1.022241443222538, + "grad_norm": 1.268227752862744, + "learning_rate": 1.0469636235988711e-05, + "loss": 0.26408523321151733, + "step": 2069 + }, + { + "epoch": 1.0227356975163722, + "grad_norm": 1.2197627847133865, + "learning_rate": 1.0461474503487606e-05, + "loss": 0.2691786289215088, + "step": 2070 + }, + { + "epoch": 1.0232299518102064, + "grad_norm": 1.2792531550605064, + "learning_rate": 1.0453312462912714e-05, + "loss": 0.2823137640953064, + "step": 2071 + }, + { + "epoch": 1.0237242061040406, + "grad_norm": 1.2027503273852609, + "learning_rate": 1.04451501197129e-05, + "loss": 0.28837013244628906, + "step": 2072 + }, + { + "epoch": 1.0242184603978748, + "grad_norm": 1.27109994402604, + "learning_rate": 1.0436987479337229e-05, + "loss": 0.2809562683105469, + "step": 2073 + }, + { + "epoch": 1.024712714691709, + "grad_norm": 1.240431430170138, + "learning_rate": 1.0428824547234956e-05, + "loss": 0.2604525685310364, + "step": 2074 + }, + { + "epoch": 1.025206968985543, + "grad_norm": 1.1799966275921325, + "learning_rate": 1.0420661328855546e-05, + "loss": 0.24755606055259705, + "step": 2075 + }, + { + "epoch": 1.0257012232793772, + "grad_norm": 1.148092531592558, + "learning_rate": 1.0412497829648642e-05, + "loss": 0.2592730224132538, + "step": 2076 + }, + { + "epoch": 1.0261954775732114, + "grad_norm": 1.2356689091758393, + "learning_rate": 1.0404334055064083e-05, + "loss": 0.2693594694137573, + "step": 2077 + }, + { + "epoch": 1.0266897318670456, + "grad_norm": 1.2195187999450414, + "learning_rate": 1.0396170010551881e-05, + "loss": 0.2712753117084503, + "step": 2078 + }, + { + "epoch": 1.0271839861608798, + "grad_norm": 1.1741285828383992, + "learning_rate": 1.0388005701562245e-05, + "loss": 0.2693077027797699, + "step": 2079 + }, + { + "epoch": 1.027678240454714, + "grad_norm": 1.2670826968894364, + "learning_rate": 1.0379841133545544e-05, + "loss": 0.2791144847869873, + "step": 2080 + }, + { + "epoch": 1.0281724947485482, + "grad_norm": 1.163594554813514, + "learning_rate": 1.037167631195233e-05, + "loss": 0.27496254444122314, + "step": 2081 + }, + { + "epoch": 1.0286667490423822, + "grad_norm": 1.1305894692188725, + "learning_rate": 1.0363511242233322e-05, + "loss": 0.26037347316741943, + "step": 2082 + }, + { + "epoch": 1.0291610033362164, + "grad_norm": 1.2085934995349474, + "learning_rate": 1.0355345929839402e-05, + "loss": 0.2610514760017395, + "step": 2083 + }, + { + "epoch": 1.0296552576300506, + "grad_norm": 1.1531883738354434, + "learning_rate": 1.0347180380221618e-05, + "loss": 0.24750857055187225, + "step": 2084 + }, + { + "epoch": 1.0301495119238848, + "grad_norm": 1.2017075670935908, + "learning_rate": 1.0339014598831169e-05, + "loss": 0.2835415303707123, + "step": 2085 + }, + { + "epoch": 1.030643766217719, + "grad_norm": 1.2153811049556569, + "learning_rate": 1.033084859111942e-05, + "loss": 0.25762057304382324, + "step": 2086 + }, + { + "epoch": 1.0311380205115532, + "grad_norm": 1.3245241554987517, + "learning_rate": 1.032268236253788e-05, + "loss": 0.2818237841129303, + "step": 2087 + }, + { + "epoch": 1.0316322748053874, + "grad_norm": 1.2402911628462394, + "learning_rate": 1.0314515918538202e-05, + "loss": 0.27192944288253784, + "step": 2088 + }, + { + "epoch": 1.0321265290992216, + "grad_norm": 1.1715597954552734, + "learning_rate": 1.0306349264572195e-05, + "loss": 0.3002319931983948, + "step": 2089 + }, + { + "epoch": 1.0326207833930556, + "grad_norm": 1.221598051409306, + "learning_rate": 1.0298182406091794e-05, + "loss": 0.27106401324272156, + "step": 2090 + }, + { + "epoch": 1.0331150376868898, + "grad_norm": 1.2123644146814079, + "learning_rate": 1.0290015348549076e-05, + "loss": 0.2740558385848999, + "step": 2091 + }, + { + "epoch": 1.033609291980724, + "grad_norm": 1.2394453454529126, + "learning_rate": 1.0281848097396261e-05, + "loss": 0.2970008850097656, + "step": 2092 + }, + { + "epoch": 1.0341035462745582, + "grad_norm": 1.2003549808286662, + "learning_rate": 1.027368065808568e-05, + "loss": 0.27684125304222107, + "step": 2093 + }, + { + "epoch": 1.0345978005683925, + "grad_norm": 1.1371538472805924, + "learning_rate": 1.0265513036069803e-05, + "loss": 0.2732700705528259, + "step": 2094 + }, + { + "epoch": 1.0350920548622267, + "grad_norm": 1.1448190493490698, + "learning_rate": 1.0257345236801215e-05, + "loss": 0.25189805030822754, + "step": 2095 + }, + { + "epoch": 1.0355863091560609, + "grad_norm": 1.1221327830153236, + "learning_rate": 1.0249177265732629e-05, + "loss": 0.3177054524421692, + "step": 2096 + }, + { + "epoch": 1.036080563449895, + "grad_norm": 1.0492479192600686, + "learning_rate": 1.0241009128316854e-05, + "loss": 0.23350921273231506, + "step": 2097 + }, + { + "epoch": 1.036574817743729, + "grad_norm": 1.2565303796372052, + "learning_rate": 1.0232840830006832e-05, + "loss": 0.3011140525341034, + "step": 2098 + }, + { + "epoch": 1.0370690720375633, + "grad_norm": 1.164329016307231, + "learning_rate": 1.0224672376255598e-05, + "loss": 0.2578561305999756, + "step": 2099 + }, + { + "epoch": 1.0375633263313975, + "grad_norm": 1.1701632763887444, + "learning_rate": 1.0216503772516297e-05, + "loss": 0.2622804045677185, + "step": 2100 + }, + { + "epoch": 1.0380575806252317, + "grad_norm": 1.219987069304434, + "learning_rate": 1.0208335024242169e-05, + "loss": 0.2662869691848755, + "step": 2101 + }, + { + "epoch": 1.0385518349190659, + "grad_norm": 1.2303351498865798, + "learning_rate": 1.0200166136886558e-05, + "loss": 0.27084922790527344, + "step": 2102 + }, + { + "epoch": 1.0390460892129, + "grad_norm": 1.2434849653646893, + "learning_rate": 1.0191997115902891e-05, + "loss": 0.26290780305862427, + "step": 2103 + }, + { + "epoch": 1.0395403435067343, + "grad_norm": 1.192171896111284, + "learning_rate": 1.0183827966744694e-05, + "loss": 0.27367106080055237, + "step": 2104 + }, + { + "epoch": 1.0400345978005685, + "grad_norm": 1.2706879657010888, + "learning_rate": 1.0175658694865574e-05, + "loss": 0.28507113456726074, + "step": 2105 + }, + { + "epoch": 1.0405288520944025, + "grad_norm": 1.2299041683114893, + "learning_rate": 1.0167489305719221e-05, + "loss": 0.2533179521560669, + "step": 2106 + }, + { + "epoch": 1.0410231063882367, + "grad_norm": 1.2546449586851505, + "learning_rate": 1.0159319804759398e-05, + "loss": 0.28755924105644226, + "step": 2107 + }, + { + "epoch": 1.041517360682071, + "grad_norm": 1.1726176332749902, + "learning_rate": 1.015115019743995e-05, + "loss": 0.26722773909568787, + "step": 2108 + }, + { + "epoch": 1.042011614975905, + "grad_norm": 1.3986075029095133, + "learning_rate": 1.0142980489214788e-05, + "loss": 0.3122308850288391, + "step": 2109 + }, + { + "epoch": 1.0425058692697393, + "grad_norm": 1.1273960807987882, + "learning_rate": 1.0134810685537899e-05, + "loss": 0.22603261470794678, + "step": 2110 + }, + { + "epoch": 1.0430001235635735, + "grad_norm": 1.1517998097919544, + "learning_rate": 1.0126640791863316e-05, + "loss": 0.2823299169540405, + "step": 2111 + }, + { + "epoch": 1.0434943778574077, + "grad_norm": 1.3191906526904469, + "learning_rate": 1.0118470813645156e-05, + "loss": 0.30999040603637695, + "step": 2112 + }, + { + "epoch": 1.0439886321512417, + "grad_norm": 1.1820148857556874, + "learning_rate": 1.0110300756337569e-05, + "loss": 0.266022264957428, + "step": 2113 + }, + { + "epoch": 1.044482886445076, + "grad_norm": 1.6608098375974347, + "learning_rate": 1.0102130625394776e-05, + "loss": 0.2674095034599304, + "step": 2114 + }, + { + "epoch": 1.0449771407389101, + "grad_norm": 1.2172826939531747, + "learning_rate": 1.0093960426271037e-05, + "loss": 0.30045652389526367, + "step": 2115 + }, + { + "epoch": 1.0454713950327443, + "grad_norm": 1.1782919874699391, + "learning_rate": 1.0085790164420659e-05, + "loss": 0.28455668687820435, + "step": 2116 + }, + { + "epoch": 1.0459656493265785, + "grad_norm": 1.1749948852757104, + "learning_rate": 1.0077619845297992e-05, + "loss": 0.2429066300392151, + "step": 2117 + }, + { + "epoch": 1.0464599036204127, + "grad_norm": 1.1453766958637177, + "learning_rate": 1.0069449474357427e-05, + "loss": 0.2515121102333069, + "step": 2118 + }, + { + "epoch": 1.046954157914247, + "grad_norm": 1.234414346344525, + "learning_rate": 1.0061279057053385e-05, + "loss": 0.30011802911758423, + "step": 2119 + }, + { + "epoch": 1.0474484122080812, + "grad_norm": 1.1997300836338318, + "learning_rate": 1.005310859884032e-05, + "loss": 0.2577645480632782, + "step": 2120 + }, + { + "epoch": 1.0479426665019151, + "grad_norm": 1.0391250618888572, + "learning_rate": 1.0044938105172713e-05, + "loss": 0.21476465463638306, + "step": 2121 + }, + { + "epoch": 1.0484369207957493, + "grad_norm": 1.3902782329860977, + "learning_rate": 1.0036767581505067e-05, + "loss": 0.2587023079395294, + "step": 2122 + }, + { + "epoch": 1.0489311750895836, + "grad_norm": 1.1311469001510768, + "learning_rate": 1.0028597033291911e-05, + "loss": 0.2537185251712799, + "step": 2123 + }, + { + "epoch": 1.0494254293834178, + "grad_norm": 1.0410406857423857, + "learning_rate": 1.0020426465987782e-05, + "loss": 0.24486014246940613, + "step": 2124 + }, + { + "epoch": 1.049919683677252, + "grad_norm": 1.4376390907817962, + "learning_rate": 1.0012255885047241e-05, + "loss": 0.2728436589241028, + "step": 2125 + }, + { + "epoch": 1.0504139379710862, + "grad_norm": 1.3186765660198476, + "learning_rate": 1.0004085295924843e-05, + "loss": 0.30238842964172363, + "step": 2126 + }, + { + "epoch": 1.0509081922649204, + "grad_norm": 1.2910923396564535, + "learning_rate": 9.99591470407516e-06, + "loss": 0.30347609519958496, + "step": 2127 + }, + { + "epoch": 1.0514024465587544, + "grad_norm": 1.2188667375190219, + "learning_rate": 9.987744114952764e-06, + "loss": 0.2581411302089691, + "step": 2128 + }, + { + "epoch": 1.0518967008525886, + "grad_norm": 1.2560629408792487, + "learning_rate": 9.979573534012218e-06, + "loss": 0.239881694316864, + "step": 2129 + }, + { + "epoch": 1.0523909551464228, + "grad_norm": 1.2977893982324902, + "learning_rate": 9.971402966708092e-06, + "loss": 0.3058615028858185, + "step": 2130 + }, + { + "epoch": 1.052885209440257, + "grad_norm": 1.2842102843103194, + "learning_rate": 9.963232418494936e-06, + "loss": 0.25285837054252625, + "step": 2131 + }, + { + "epoch": 1.0533794637340912, + "grad_norm": 1.2217652802535364, + "learning_rate": 9.955061894827294e-06, + "loss": 0.27366510033607483, + "step": 2132 + }, + { + "epoch": 1.0538737180279254, + "grad_norm": 1.1489983530266883, + "learning_rate": 9.946891401159683e-06, + "loss": 0.22268086671829224, + "step": 2133 + }, + { + "epoch": 1.0543679723217596, + "grad_norm": 1.1461059074650484, + "learning_rate": 9.938720942946616e-06, + "loss": 0.2540682554244995, + "step": 2134 + }, + { + "epoch": 1.0548622266155938, + "grad_norm": 1.2357731632052622, + "learning_rate": 9.930550525642576e-06, + "loss": 0.262179970741272, + "step": 2135 + }, + { + "epoch": 1.0553564809094278, + "grad_norm": 1.2267299487839205, + "learning_rate": 9.92238015470201e-06, + "loss": 0.25471946597099304, + "step": 2136 + }, + { + "epoch": 1.055850735203262, + "grad_norm": 1.162352058446371, + "learning_rate": 9.914209835579344e-06, + "loss": 0.2580556571483612, + "step": 2137 + }, + { + "epoch": 1.0563449894970962, + "grad_norm": 1.261401071852413, + "learning_rate": 9.906039573728964e-06, + "loss": 0.29909616708755493, + "step": 2138 + }, + { + "epoch": 1.0568392437909304, + "grad_norm": 1.2162562018595562, + "learning_rate": 9.897869374605226e-06, + "loss": 0.2828724980354309, + "step": 2139 + }, + { + "epoch": 1.0573334980847646, + "grad_norm": 1.2076714268656592, + "learning_rate": 9.889699243662433e-06, + "loss": 0.26731711626052856, + "step": 2140 + }, + { + "epoch": 1.0578277523785988, + "grad_norm": 1.2666827338430986, + "learning_rate": 9.88152918635485e-06, + "loss": 0.2912555932998657, + "step": 2141 + }, + { + "epoch": 1.058322006672433, + "grad_norm": 1.1593053736993435, + "learning_rate": 9.873359208136685e-06, + "loss": 0.2335313856601715, + "step": 2142 + }, + { + "epoch": 1.0588162609662672, + "grad_norm": 1.2934128795704303, + "learning_rate": 9.865189314462105e-06, + "loss": 0.2716987729072571, + "step": 2143 + }, + { + "epoch": 1.0593105152601012, + "grad_norm": 1.3251488161911162, + "learning_rate": 9.857019510785215e-06, + "loss": 0.2919968068599701, + "step": 2144 + }, + { + "epoch": 1.0598047695539354, + "grad_norm": 1.197230535187453, + "learning_rate": 9.848849802560057e-06, + "loss": 0.26279503107070923, + "step": 2145 + }, + { + "epoch": 1.0602990238477696, + "grad_norm": 1.263871154668556, + "learning_rate": 9.840680195240606e-06, + "loss": 0.31622597575187683, + "step": 2146 + }, + { + "epoch": 1.0607932781416038, + "grad_norm": 1.270948260835911, + "learning_rate": 9.832510694280782e-06, + "loss": 0.2399556040763855, + "step": 2147 + }, + { + "epoch": 1.061287532435438, + "grad_norm": 1.2181574543701559, + "learning_rate": 9.824341305134428e-06, + "loss": 0.2650333046913147, + "step": 2148 + }, + { + "epoch": 1.0617817867292723, + "grad_norm": 1.274348887888969, + "learning_rate": 9.816172033255307e-06, + "loss": 0.26629161834716797, + "step": 2149 + }, + { + "epoch": 1.0622760410231065, + "grad_norm": 1.2611051957138737, + "learning_rate": 9.808002884097109e-06, + "loss": 0.28042545914649963, + "step": 2150 + }, + { + "epoch": 1.0627702953169407, + "grad_norm": 1.1495131020915084, + "learning_rate": 9.799833863113445e-06, + "loss": 0.24374082684516907, + "step": 2151 + }, + { + "epoch": 1.0632645496107747, + "grad_norm": 1.1048551979398207, + "learning_rate": 9.791664975757835e-06, + "loss": 0.23013898730278015, + "step": 2152 + }, + { + "epoch": 1.0637588039046089, + "grad_norm": 1.4072884886903234, + "learning_rate": 9.783496227483706e-06, + "loss": 0.25313276052474976, + "step": 2153 + }, + { + "epoch": 1.064253058198443, + "grad_norm": 1.248155174046862, + "learning_rate": 9.775327623744403e-06, + "loss": 0.2642362713813782, + "step": 2154 + }, + { + "epoch": 1.0647473124922773, + "grad_norm": 1.1405325090848468, + "learning_rate": 9.76715916999317e-06, + "loss": 0.2417108118534088, + "step": 2155 + }, + { + "epoch": 1.0652415667861115, + "grad_norm": 1.2556215450887547, + "learning_rate": 9.758990871683148e-06, + "loss": 0.25653502345085144, + "step": 2156 + }, + { + "epoch": 1.0657358210799457, + "grad_norm": 1.22877547041534, + "learning_rate": 9.750822734267378e-06, + "loss": 0.247604638338089, + "step": 2157 + }, + { + "epoch": 1.06623007537378, + "grad_norm": 1.2330600407976389, + "learning_rate": 9.742654763198786e-06, + "loss": 0.2675636112689972, + "step": 2158 + }, + { + "epoch": 1.0667243296676139, + "grad_norm": 1.230290211943024, + "learning_rate": 9.7344869639302e-06, + "loss": 0.2570686340332031, + "step": 2159 + }, + { + "epoch": 1.067218583961448, + "grad_norm": 1.4290278531414855, + "learning_rate": 9.726319341914323e-06, + "loss": 0.3046165704727173, + "step": 2160 + }, + { + "epoch": 1.0677128382552823, + "grad_norm": 1.3759048148010737, + "learning_rate": 9.718151902603744e-06, + "loss": 0.24278515577316284, + "step": 2161 + }, + { + "epoch": 1.0682070925491165, + "grad_norm": 1.235098490769484, + "learning_rate": 9.709984651450924e-06, + "loss": 0.2565615773200989, + "step": 2162 + }, + { + "epoch": 1.0687013468429507, + "grad_norm": 1.3303607886608886, + "learning_rate": 9.701817593908209e-06, + "loss": 0.2672972083091736, + "step": 2163 + }, + { + "epoch": 1.069195601136785, + "grad_norm": 1.1620974642583077, + "learning_rate": 9.693650735427808e-06, + "loss": 0.21376445889472961, + "step": 2164 + }, + { + "epoch": 1.0696898554306191, + "grad_norm": 1.2628274098639385, + "learning_rate": 9.685484081461802e-06, + "loss": 0.27743393182754517, + "step": 2165 + }, + { + "epoch": 1.0701841097244533, + "grad_norm": 1.3615817033316626, + "learning_rate": 9.677317637462125e-06, + "loss": 0.2747134566307068, + "step": 2166 + }, + { + "epoch": 1.0706783640182873, + "grad_norm": 1.1533673233774355, + "learning_rate": 9.669151408880581e-06, + "loss": 0.2775312066078186, + "step": 2167 + }, + { + "epoch": 1.0711726183121215, + "grad_norm": 1.392383813550365, + "learning_rate": 9.660985401168833e-06, + "loss": 0.2743167281150818, + "step": 2168 + }, + { + "epoch": 1.0716668726059557, + "grad_norm": 1.1731022030570613, + "learning_rate": 9.652819619778387e-06, + "loss": 0.26030686497688293, + "step": 2169 + }, + { + "epoch": 1.07216112689979, + "grad_norm": 1.2886350622041207, + "learning_rate": 9.644654070160603e-06, + "loss": 0.32307812571525574, + "step": 2170 + }, + { + "epoch": 1.0726553811936241, + "grad_norm": 1.309807945595821, + "learning_rate": 9.63648875776668e-06, + "loss": 0.2773011028766632, + "step": 2171 + }, + { + "epoch": 1.0731496354874583, + "grad_norm": 1.3767412291020849, + "learning_rate": 9.628323688047672e-06, + "loss": 0.27996528148651123, + "step": 2172 + }, + { + "epoch": 1.0736438897812925, + "grad_norm": 1.176261909375135, + "learning_rate": 9.620158866454459e-06, + "loss": 0.28022176027297974, + "step": 2173 + }, + { + "epoch": 1.0741381440751268, + "grad_norm": 1.1746327357052728, + "learning_rate": 9.61199429843776e-06, + "loss": 0.2688876986503601, + "step": 2174 + }, + { + "epoch": 1.0746323983689607, + "grad_norm": 1.1454924799354713, + "learning_rate": 9.60382998944812e-06, + "loss": 0.23915211856365204, + "step": 2175 + }, + { + "epoch": 1.075126652662795, + "grad_norm": 1.1770664027196904, + "learning_rate": 9.59566594493592e-06, + "loss": 0.2533806264400482, + "step": 2176 + }, + { + "epoch": 1.0756209069566292, + "grad_norm": 1.2321355277799408, + "learning_rate": 9.587502170351361e-06, + "loss": 0.2887522876262665, + "step": 2177 + }, + { + "epoch": 1.0761151612504634, + "grad_norm": 1.2169372388289537, + "learning_rate": 9.579338671144459e-06, + "loss": 0.2885408401489258, + "step": 2178 + }, + { + "epoch": 1.0766094155442976, + "grad_norm": 1.2209492195717289, + "learning_rate": 9.571175452765045e-06, + "loss": 0.25656914710998535, + "step": 2179 + }, + { + "epoch": 1.0771036698381318, + "grad_norm": 1.2669016448608037, + "learning_rate": 9.563012520662773e-06, + "loss": 0.2935143709182739, + "step": 2180 + }, + { + "epoch": 1.077597924131966, + "grad_norm": 1.2902152081672096, + "learning_rate": 9.554849880287103e-06, + "loss": 0.26728200912475586, + "step": 2181 + }, + { + "epoch": 1.0780921784258002, + "grad_norm": 1.4327778934971358, + "learning_rate": 9.546687537087287e-06, + "loss": 0.2558351159095764, + "step": 2182 + }, + { + "epoch": 1.0785864327196342, + "grad_norm": 1.133861673349663, + "learning_rate": 9.538525496512394e-06, + "loss": 0.2517240047454834, + "step": 2183 + }, + { + "epoch": 1.0790806870134684, + "grad_norm": 1.1033603168250732, + "learning_rate": 9.53036376401129e-06, + "loss": 0.23258647322654724, + "step": 2184 + }, + { + "epoch": 1.0795749413073026, + "grad_norm": 1.2016172891455823, + "learning_rate": 9.522202345032627e-06, + "loss": 0.24100016057491302, + "step": 2185 + }, + { + "epoch": 1.0800691956011368, + "grad_norm": 1.1844138198826075, + "learning_rate": 9.51404124502485e-06, + "loss": 0.27807697653770447, + "step": 2186 + }, + { + "epoch": 1.080563449894971, + "grad_norm": 1.2045646158236256, + "learning_rate": 9.50588046943619e-06, + "loss": 0.26146867871284485, + "step": 2187 + }, + { + "epoch": 1.0810577041888052, + "grad_norm": 1.3792610621050578, + "learning_rate": 9.497720023714675e-06, + "loss": 0.28570955991744995, + "step": 2188 + }, + { + "epoch": 1.0815519584826394, + "grad_norm": 1.146591161630138, + "learning_rate": 9.489559913308092e-06, + "loss": 0.22583246231079102, + "step": 2189 + }, + { + "epoch": 1.0820462127764734, + "grad_norm": 1.2292468406383597, + "learning_rate": 9.48140014366402e-06, + "loss": 0.27526232600212097, + "step": 2190 + }, + { + "epoch": 1.0825404670703076, + "grad_norm": 1.287410242270342, + "learning_rate": 9.473240720229803e-06, + "loss": 0.2777514159679413, + "step": 2191 + }, + { + "epoch": 1.0830347213641418, + "grad_norm": 1.217692620890676, + "learning_rate": 9.465081648452549e-06, + "loss": 0.25767001509666443, + "step": 2192 + }, + { + "epoch": 1.083528975657976, + "grad_norm": 1.2401214064051047, + "learning_rate": 9.456922933779148e-06, + "loss": 0.24114865064620972, + "step": 2193 + }, + { + "epoch": 1.0840232299518102, + "grad_norm": 1.3343620945353547, + "learning_rate": 9.448764581656237e-06, + "loss": 0.31198200583457947, + "step": 2194 + }, + { + "epoch": 1.0845174842456444, + "grad_norm": 1.2865355942160217, + "learning_rate": 9.440606597530213e-06, + "loss": 0.2724478840827942, + "step": 2195 + }, + { + "epoch": 1.0850117385394786, + "grad_norm": 1.2982367761916904, + "learning_rate": 9.432448986847229e-06, + "loss": 0.27796900272369385, + "step": 2196 + }, + { + "epoch": 1.0855059928333128, + "grad_norm": 1.293883522594156, + "learning_rate": 9.424291755053198e-06, + "loss": 0.2877587676048279, + "step": 2197 + }, + { + "epoch": 1.0860002471271468, + "grad_norm": 1.354561961211439, + "learning_rate": 9.416134907593764e-06, + "loss": 0.2898337244987488, + "step": 2198 + }, + { + "epoch": 1.086494501420981, + "grad_norm": 1.2931825621227928, + "learning_rate": 9.407978449914322e-06, + "loss": 0.2544672191143036, + "step": 2199 + }, + { + "epoch": 1.0869887557148152, + "grad_norm": 1.2905943399481439, + "learning_rate": 9.399822387460005e-06, + "loss": 0.28336071968078613, + "step": 2200 + }, + { + "epoch": 1.0874830100086494, + "grad_norm": 1.2871287196611743, + "learning_rate": 9.391666725675691e-06, + "loss": 0.2862734794616699, + "step": 2201 + }, + { + "epoch": 1.0879772643024836, + "grad_norm": 1.386969000020192, + "learning_rate": 9.383511470005978e-06, + "loss": 0.26331260800361633, + "step": 2202 + }, + { + "epoch": 1.0884715185963179, + "grad_norm": 1.2750467510922643, + "learning_rate": 9.375356625895201e-06, + "loss": 0.30087417364120483, + "step": 2203 + }, + { + "epoch": 1.088965772890152, + "grad_norm": 1.3434362766675538, + "learning_rate": 9.36720219878741e-06, + "loss": 0.2736594080924988, + "step": 2204 + }, + { + "epoch": 1.089460027183986, + "grad_norm": 1.4852243291487657, + "learning_rate": 9.359048194126395e-06, + "loss": 0.2704418897628784, + "step": 2205 + }, + { + "epoch": 1.0899542814778203, + "grad_norm": 1.2230094225693318, + "learning_rate": 9.350894617355645e-06, + "loss": 0.24540236592292786, + "step": 2206 + }, + { + "epoch": 1.0904485357716545, + "grad_norm": 1.2299505503288506, + "learning_rate": 9.342741473918375e-06, + "loss": 0.26376527547836304, + "step": 2207 + }, + { + "epoch": 1.0909427900654887, + "grad_norm": 1.0803859595224048, + "learning_rate": 9.334588769257502e-06, + "loss": 0.24062004685401917, + "step": 2208 + }, + { + "epoch": 1.0914370443593229, + "grad_norm": 1.1443970874822365, + "learning_rate": 9.326436508815662e-06, + "loss": 0.24209418892860413, + "step": 2209 + }, + { + "epoch": 1.091931298653157, + "grad_norm": 1.3414968412819865, + "learning_rate": 9.318284698035188e-06, + "loss": 0.2732285261154175, + "step": 2210 + }, + { + "epoch": 1.0924255529469913, + "grad_norm": 1.2470429271312866, + "learning_rate": 9.310133342358106e-06, + "loss": 0.2684158980846405, + "step": 2211 + }, + { + "epoch": 1.0929198072408255, + "grad_norm": 1.1035267199988392, + "learning_rate": 9.301982447226145e-06, + "loss": 0.22511601448059082, + "step": 2212 + }, + { + "epoch": 1.0934140615346597, + "grad_norm": 1.165505029883992, + "learning_rate": 9.293832018080731e-06, + "loss": 0.2622867226600647, + "step": 2213 + }, + { + "epoch": 1.0939083158284937, + "grad_norm": 1.2923685951682604, + "learning_rate": 9.285682060362974e-06, + "loss": 0.3030891418457031, + "step": 2214 + }, + { + "epoch": 1.094402570122328, + "grad_norm": 1.2523210407583818, + "learning_rate": 9.277532579513666e-06, + "loss": 0.24928592145442963, + "step": 2215 + }, + { + "epoch": 1.094896824416162, + "grad_norm": 1.2048717570746186, + "learning_rate": 9.269383580973285e-06, + "loss": 0.2588339149951935, + "step": 2216 + }, + { + "epoch": 1.0953910787099963, + "grad_norm": 1.2427748942142012, + "learning_rate": 9.261235070181983e-06, + "loss": 0.2587873339653015, + "step": 2217 + }, + { + "epoch": 1.0958853330038305, + "grad_norm": 1.3192410250632676, + "learning_rate": 9.253087052579596e-06, + "loss": 0.29420971870422363, + "step": 2218 + }, + { + "epoch": 1.0963795872976647, + "grad_norm": 1.1714489078180652, + "learning_rate": 9.244939533605619e-06, + "loss": 0.25384342670440674, + "step": 2219 + }, + { + "epoch": 1.096873841591499, + "grad_norm": 1.2208998726962157, + "learning_rate": 9.236792518699224e-06, + "loss": 0.23133251070976257, + "step": 2220 + }, + { + "epoch": 1.097368095885333, + "grad_norm": 1.1919788928879418, + "learning_rate": 9.228646013299233e-06, + "loss": 0.26196008920669556, + "step": 2221 + }, + { + "epoch": 1.0978623501791671, + "grad_norm": 1.345065700534229, + "learning_rate": 9.220500022844144e-06, + "loss": 0.2567690908908844, + "step": 2222 + }, + { + "epoch": 1.0983566044730013, + "grad_norm": 1.1808254692787845, + "learning_rate": 9.212354552772107e-06, + "loss": 0.2555367350578308, + "step": 2223 + }, + { + "epoch": 1.0988508587668355, + "grad_norm": 1.1544608952675586, + "learning_rate": 9.204209608520913e-06, + "loss": 0.24357245862483978, + "step": 2224 + }, + { + "epoch": 1.0993451130606697, + "grad_norm": 1.3367524689374175, + "learning_rate": 9.19606519552801e-06, + "loss": 0.2792712450027466, + "step": 2225 + }, + { + "epoch": 1.099839367354504, + "grad_norm": 1.3277136329189279, + "learning_rate": 9.1879213192305e-06, + "loss": 0.29090794920921326, + "step": 2226 + }, + { + "epoch": 1.1003336216483381, + "grad_norm": 1.304360721279056, + "learning_rate": 9.179777985065115e-06, + "loss": 0.2777528762817383, + "step": 2227 + }, + { + "epoch": 1.1008278759421724, + "grad_norm": 1.1781995191131436, + "learning_rate": 9.171635198468227e-06, + "loss": 0.263868123292923, + "step": 2228 + }, + { + "epoch": 1.1013221302360063, + "grad_norm": 1.184942105326879, + "learning_rate": 9.16349296487584e-06, + "loss": 0.24118748307228088, + "step": 2229 + }, + { + "epoch": 1.1018163845298405, + "grad_norm": 1.2411255946822906, + "learning_rate": 9.155351289723603e-06, + "loss": 0.2176896631717682, + "step": 2230 + }, + { + "epoch": 1.1023106388236747, + "grad_norm": 1.3759218504425914, + "learning_rate": 9.147210178446776e-06, + "loss": 0.24727840721607208, + "step": 2231 + }, + { + "epoch": 1.102804893117509, + "grad_norm": 1.287783002848043, + "learning_rate": 9.139069636480247e-06, + "loss": 0.2711295783519745, + "step": 2232 + }, + { + "epoch": 1.1032991474113432, + "grad_norm": 1.2808604096079383, + "learning_rate": 9.130929669258525e-06, + "loss": 0.2987736165523529, + "step": 2233 + }, + { + "epoch": 1.1037934017051774, + "grad_norm": 1.3771259989337001, + "learning_rate": 9.122790282215743e-06, + "loss": 0.2773835062980652, + "step": 2234 + }, + { + "epoch": 1.1042876559990116, + "grad_norm": 1.2299830744412572, + "learning_rate": 9.114651480785632e-06, + "loss": 0.29417523741722107, + "step": 2235 + }, + { + "epoch": 1.1047819102928456, + "grad_norm": 1.377692958442212, + "learning_rate": 9.106513270401545e-06, + "loss": 0.2642611265182495, + "step": 2236 + }, + { + "epoch": 1.1052761645866798, + "grad_norm": 1.2764125735134089, + "learning_rate": 9.098375656496434e-06, + "loss": 0.2789427638053894, + "step": 2237 + }, + { + "epoch": 1.105770418880514, + "grad_norm": 1.3238778744589295, + "learning_rate": 9.090238644502845e-06, + "loss": 0.3002237379550934, + "step": 2238 + }, + { + "epoch": 1.1062646731743482, + "grad_norm": 1.1862434874371655, + "learning_rate": 9.082102239852942e-06, + "loss": 0.27620676159858704, + "step": 2239 + }, + { + "epoch": 1.1067589274681824, + "grad_norm": 1.327009037228036, + "learning_rate": 9.07396644797847e-06, + "loss": 0.26718735694885254, + "step": 2240 + }, + { + "epoch": 1.1072531817620166, + "grad_norm": 1.3581828145326202, + "learning_rate": 9.065831274310763e-06, + "loss": 0.27443817257881165, + "step": 2241 + }, + { + "epoch": 1.1077474360558508, + "grad_norm": 1.2348189100714968, + "learning_rate": 9.057696724280748e-06, + "loss": 0.2536284923553467, + "step": 2242 + }, + { + "epoch": 1.108241690349685, + "grad_norm": 1.274876240899672, + "learning_rate": 9.049562803318942e-06, + "loss": 0.2583077549934387, + "step": 2243 + }, + { + "epoch": 1.108735944643519, + "grad_norm": 1.2591915779147578, + "learning_rate": 9.041429516855427e-06, + "loss": 0.2696278393268585, + "step": 2244 + }, + { + "epoch": 1.1092301989373532, + "grad_norm": 1.4248240108913692, + "learning_rate": 9.033296870319868e-06, + "loss": 0.2966364622116089, + "step": 2245 + }, + { + "epoch": 1.1097244532311874, + "grad_norm": 1.1050822330716321, + "learning_rate": 9.025164869141503e-06, + "loss": 0.22690679132938385, + "step": 2246 + }, + { + "epoch": 1.1102187075250216, + "grad_norm": 1.192560579016723, + "learning_rate": 9.017033518749147e-06, + "loss": 0.2777915894985199, + "step": 2247 + }, + { + "epoch": 1.1107129618188558, + "grad_norm": 1.3394858504136318, + "learning_rate": 9.008902824571168e-06, + "loss": 0.2890303134918213, + "step": 2248 + }, + { + "epoch": 1.11120721611269, + "grad_norm": 1.0426463189164805, + "learning_rate": 9.000772792035505e-06, + "loss": 0.22669392824172974, + "step": 2249 + }, + { + "epoch": 1.1117014704065242, + "grad_norm": 1.1970809485558533, + "learning_rate": 8.992643426569643e-06, + "loss": 0.26416563987731934, + "step": 2250 + }, + { + "epoch": 1.1121957247003584, + "grad_norm": 1.1888202892832207, + "learning_rate": 8.984514733600641e-06, + "loss": 0.2745298147201538, + "step": 2251 + }, + { + "epoch": 1.1126899789941924, + "grad_norm": 1.3798693264357922, + "learning_rate": 8.97638671855509e-06, + "loss": 0.31175684928894043, + "step": 2252 + }, + { + "epoch": 1.1131842332880266, + "grad_norm": 1.1626887122886307, + "learning_rate": 8.968259386859146e-06, + "loss": 0.2632657289505005, + "step": 2253 + }, + { + "epoch": 1.1136784875818608, + "grad_norm": 1.810662888324155, + "learning_rate": 8.960132743938485e-06, + "loss": 0.25820252299308777, + "step": 2254 + }, + { + "epoch": 1.114172741875695, + "grad_norm": 1.061521514088085, + "learning_rate": 8.95200679521835e-06, + "loss": 0.24255456030368805, + "step": 2255 + }, + { + "epoch": 1.1146669961695292, + "grad_norm": 1.2696759740581753, + "learning_rate": 8.943881546123506e-06, + "loss": 0.2973442077636719, + "step": 2256 + }, + { + "epoch": 1.1151612504633635, + "grad_norm": 1.1336353694819978, + "learning_rate": 8.935757002078252e-06, + "loss": 0.23320606350898743, + "step": 2257 + }, + { + "epoch": 1.1156555047571977, + "grad_norm": 1.275444057796017, + "learning_rate": 8.927633168506415e-06, + "loss": 0.2923268675804138, + "step": 2258 + }, + { + "epoch": 1.1161497590510319, + "grad_norm": 1.25496425665649, + "learning_rate": 8.91951005083135e-06, + "loss": 0.25932425260543823, + "step": 2259 + }, + { + "epoch": 1.1166440133448658, + "grad_norm": 1.2215943645090854, + "learning_rate": 8.911387654475943e-06, + "loss": 0.2631821036338806, + "step": 2260 + }, + { + "epoch": 1.1171382676387, + "grad_norm": 1.226020936236602, + "learning_rate": 8.903265984862581e-06, + "loss": 0.24741420149803162, + "step": 2261 + }, + { + "epoch": 1.1176325219325343, + "grad_norm": 1.165036984102613, + "learning_rate": 8.895145047413178e-06, + "loss": 0.2593516707420349, + "step": 2262 + }, + { + "epoch": 1.1181267762263685, + "grad_norm": 1.2132388690590856, + "learning_rate": 8.88702484754915e-06, + "loss": 0.22109609842300415, + "step": 2263 + }, + { + "epoch": 1.1186210305202027, + "grad_norm": 1.242512673005374, + "learning_rate": 8.878905390691437e-06, + "loss": 0.24363039433956146, + "step": 2264 + }, + { + "epoch": 1.1191152848140369, + "grad_norm": 1.210365574835302, + "learning_rate": 8.870786682260465e-06, + "loss": 0.2507505714893341, + "step": 2265 + }, + { + "epoch": 1.119609539107871, + "grad_norm": 1.3229609964254254, + "learning_rate": 8.86266872767617e-06, + "loss": 0.303046315908432, + "step": 2266 + }, + { + "epoch": 1.120103793401705, + "grad_norm": 1.282548473383847, + "learning_rate": 8.854551532357977e-06, + "loss": 0.257943332195282, + "step": 2267 + }, + { + "epoch": 1.1205980476955393, + "grad_norm": 1.2641740973335522, + "learning_rate": 8.84643510172482e-06, + "loss": 0.2697421610355377, + "step": 2268 + }, + { + "epoch": 1.1210923019893735, + "grad_norm": 1.126371134669409, + "learning_rate": 8.838319441195105e-06, + "loss": 0.20090234279632568, + "step": 2269 + }, + { + "epoch": 1.1215865562832077, + "grad_norm": 1.3584193930662543, + "learning_rate": 8.830204556186736e-06, + "loss": 0.2714189887046814, + "step": 2270 + }, + { + "epoch": 1.122080810577042, + "grad_norm": 1.1168786328747864, + "learning_rate": 8.822090452117084e-06, + "loss": 0.23497477173805237, + "step": 2271 + }, + { + "epoch": 1.122575064870876, + "grad_norm": 1.3047944688196833, + "learning_rate": 8.81397713440302e-06, + "loss": 0.2582445740699768, + "step": 2272 + }, + { + "epoch": 1.1230693191647103, + "grad_norm": 1.2807794267280126, + "learning_rate": 8.805864608460876e-06, + "loss": 0.26494619250297546, + "step": 2273 + }, + { + "epoch": 1.1235635734585445, + "grad_norm": 1.3251515621500554, + "learning_rate": 8.797752879706455e-06, + "loss": 0.2767868936061859, + "step": 2274 + }, + { + "epoch": 1.1240578277523785, + "grad_norm": 1.5161646380346314, + "learning_rate": 8.789641953555032e-06, + "loss": 0.27696311473846436, + "step": 2275 + }, + { + "epoch": 1.1245520820462127, + "grad_norm": 1.3659389136687503, + "learning_rate": 8.78153183542135e-06, + "loss": 0.27048689126968384, + "step": 2276 + }, + { + "epoch": 1.125046336340047, + "grad_norm": 1.3893625373049876, + "learning_rate": 8.773422530719606e-06, + "loss": 0.2940211892127991, + "step": 2277 + }, + { + "epoch": 1.1255405906338811, + "grad_norm": 1.310212206650707, + "learning_rate": 8.765314044863453e-06, + "loss": 0.24859851598739624, + "step": 2278 + }, + { + "epoch": 1.1260348449277153, + "grad_norm": 1.3087530353150083, + "learning_rate": 8.757206383265998e-06, + "loss": 0.28879350423812866, + "step": 2279 + }, + { + "epoch": 1.1265290992215495, + "grad_norm": 1.2514534154786532, + "learning_rate": 8.74909955133981e-06, + "loss": 0.24804209172725677, + "step": 2280 + }, + { + "epoch": 1.1270233535153837, + "grad_norm": 1.3358056447173947, + "learning_rate": 8.740993554496886e-06, + "loss": 0.3199496567249298, + "step": 2281 + }, + { + "epoch": 1.1275176078092177, + "grad_norm": 2.15705729620974, + "learning_rate": 8.732888398148678e-06, + "loss": 0.3098929524421692, + "step": 2282 + }, + { + "epoch": 1.128011862103052, + "grad_norm": 1.2048730778866592, + "learning_rate": 8.724784087706067e-06, + "loss": 0.21280749142169952, + "step": 2283 + }, + { + "epoch": 1.1285061163968861, + "grad_norm": 1.1819530781050969, + "learning_rate": 8.716680628579382e-06, + "loss": 0.25330856442451477, + "step": 2284 + }, + { + "epoch": 1.1290003706907203, + "grad_norm": 1.2218083349938962, + "learning_rate": 8.708578026178371e-06, + "loss": 0.26141977310180664, + "step": 2285 + }, + { + "epoch": 1.1294946249845546, + "grad_norm": 1.3085311775335164, + "learning_rate": 8.700476285912219e-06, + "loss": 0.2529010772705078, + "step": 2286 + }, + { + "epoch": 1.1299888792783888, + "grad_norm": 1.4496496993285695, + "learning_rate": 8.69237541318953e-06, + "loss": 0.2662504315376282, + "step": 2287 + }, + { + "epoch": 1.130483133572223, + "grad_norm": 1.2797233255982605, + "learning_rate": 8.684275413418329e-06, + "loss": 0.2724575996398926, + "step": 2288 + }, + { + "epoch": 1.1309773878660572, + "grad_norm": 1.2524016016810007, + "learning_rate": 8.676176292006065e-06, + "loss": 0.2820962965488434, + "step": 2289 + }, + { + "epoch": 1.1314716421598914, + "grad_norm": 1.2157522787611978, + "learning_rate": 8.668078054359595e-06, + "loss": 0.2594743072986603, + "step": 2290 + }, + { + "epoch": 1.1319658964537254, + "grad_norm": 1.1017631552140204, + "learning_rate": 8.659980705885183e-06, + "loss": 0.25397709012031555, + "step": 2291 + }, + { + "epoch": 1.1324601507475596, + "grad_norm": 1.3505914192645034, + "learning_rate": 8.651884251988503e-06, + "loss": 0.27261337637901306, + "step": 2292 + }, + { + "epoch": 1.1329544050413938, + "grad_norm": 1.191460472235454, + "learning_rate": 8.643788698074638e-06, + "loss": 0.2726992070674896, + "step": 2293 + }, + { + "epoch": 1.133448659335228, + "grad_norm": 1.2175895117879216, + "learning_rate": 8.635694049548058e-06, + "loss": 0.2792774438858032, + "step": 2294 + }, + { + "epoch": 1.1339429136290622, + "grad_norm": 1.272860546351146, + "learning_rate": 8.627600311812638e-06, + "loss": 0.310885488986969, + "step": 2295 + }, + { + "epoch": 1.1344371679228964, + "grad_norm": 1.2747295027163217, + "learning_rate": 8.619507490271638e-06, + "loss": 0.27060413360595703, + "step": 2296 + }, + { + "epoch": 1.1349314222167306, + "grad_norm": 1.2507140444567972, + "learning_rate": 8.611415590327718e-06, + "loss": 0.27069440484046936, + "step": 2297 + }, + { + "epoch": 1.1354256765105646, + "grad_norm": 1.2299186955801236, + "learning_rate": 8.603324617382905e-06, + "loss": 0.2790459990501404, + "step": 2298 + }, + { + "epoch": 1.1359199308043988, + "grad_norm": 1.2813816772493964, + "learning_rate": 8.595234576838624e-06, + "loss": 0.27170947194099426, + "step": 2299 + }, + { + "epoch": 1.136414185098233, + "grad_norm": 1.1903279302585759, + "learning_rate": 8.587145474095665e-06, + "loss": 0.25313863158226013, + "step": 2300 + }, + { + "epoch": 1.1369084393920672, + "grad_norm": 1.2968469055543796, + "learning_rate": 8.5790573145542e-06, + "loss": 0.289467990398407, + "step": 2301 + }, + { + "epoch": 1.1374026936859014, + "grad_norm": 1.3141096348522086, + "learning_rate": 8.570970103613774e-06, + "loss": 0.29796460270881653, + "step": 2302 + }, + { + "epoch": 1.1378969479797356, + "grad_norm": 1.2855551342619271, + "learning_rate": 8.562883846673286e-06, + "loss": 0.27264270186424255, + "step": 2303 + }, + { + "epoch": 1.1383912022735698, + "grad_norm": 1.2243974310235655, + "learning_rate": 8.554798549131005e-06, + "loss": 0.3099757134914398, + "step": 2304 + }, + { + "epoch": 1.138885456567404, + "grad_norm": 1.2936181628424743, + "learning_rate": 8.546714216384565e-06, + "loss": 0.30002498626708984, + "step": 2305 + }, + { + "epoch": 1.139379710861238, + "grad_norm": 1.7617864884936485, + "learning_rate": 8.538630853830951e-06, + "loss": 0.2428818643093109, + "step": 2306 + }, + { + "epoch": 1.1398739651550722, + "grad_norm": 1.24686983002664, + "learning_rate": 8.530548466866497e-06, + "loss": 0.2601294219493866, + "step": 2307 + }, + { + "epoch": 1.1403682194489064, + "grad_norm": 1.2066765531591284, + "learning_rate": 8.522467060886888e-06, + "loss": 0.23878628015518188, + "step": 2308 + }, + { + "epoch": 1.1408624737427406, + "grad_norm": 1.345733709932402, + "learning_rate": 8.514386641287163e-06, + "loss": 0.2780643403530121, + "step": 2309 + }, + { + "epoch": 1.1413567280365748, + "grad_norm": 1.2756115099724787, + "learning_rate": 8.506307213461689e-06, + "loss": 0.29834824800491333, + "step": 2310 + }, + { + "epoch": 1.141850982330409, + "grad_norm": 1.3376095615389103, + "learning_rate": 8.498228782804175e-06, + "loss": 0.2733996510505676, + "step": 2311 + }, + { + "epoch": 1.1423452366242433, + "grad_norm": 1.3063802509871558, + "learning_rate": 8.490151354707669e-06, + "loss": 0.2524843215942383, + "step": 2312 + }, + { + "epoch": 1.1428394909180772, + "grad_norm": 1.2776723106689647, + "learning_rate": 8.482074934564543e-06, + "loss": 0.29077857732772827, + "step": 2313 + }, + { + "epoch": 1.1433337452119114, + "grad_norm": 1.2114776729729342, + "learning_rate": 8.473999527766503e-06, + "loss": 0.25935155153274536, + "step": 2314 + }, + { + "epoch": 1.1438279995057457, + "grad_norm": 1.3166365920869918, + "learning_rate": 8.465925139704578e-06, + "loss": 0.23595012724399567, + "step": 2315 + }, + { + "epoch": 1.1443222537995799, + "grad_norm": 1.2268504419293456, + "learning_rate": 8.457851775769108e-06, + "loss": 0.25193360447883606, + "step": 2316 + }, + { + "epoch": 1.144816508093414, + "grad_norm": 1.2847886622034916, + "learning_rate": 8.449779441349755e-06, + "loss": 0.26844412088394165, + "step": 2317 + }, + { + "epoch": 1.1453107623872483, + "grad_norm": 1.2550831674884213, + "learning_rate": 8.441708141835499e-06, + "loss": 0.2507320046424866, + "step": 2318 + }, + { + "epoch": 1.1458050166810825, + "grad_norm": 1.31186920690482, + "learning_rate": 8.433637882614624e-06, + "loss": 0.2756047248840332, + "step": 2319 + }, + { + "epoch": 1.1462992709749167, + "grad_norm": 1.3818376930568548, + "learning_rate": 8.425568669074717e-06, + "loss": 0.3136482536792755, + "step": 2320 + }, + { + "epoch": 1.146793525268751, + "grad_norm": 1.3094285230006764, + "learning_rate": 8.417500506602668e-06, + "loss": 0.25975438952445984, + "step": 2321 + }, + { + "epoch": 1.1472877795625849, + "grad_norm": 1.3148310008881885, + "learning_rate": 8.409433400584674e-06, + "loss": 0.2524915039539337, + "step": 2322 + }, + { + "epoch": 1.147782033856419, + "grad_norm": 1.316055955366049, + "learning_rate": 8.401367356406214e-06, + "loss": 0.2731180787086487, + "step": 2323 + }, + { + "epoch": 1.1482762881502533, + "grad_norm": 1.4277670811350172, + "learning_rate": 8.393302379452065e-06, + "loss": 0.27752095460891724, + "step": 2324 + }, + { + "epoch": 1.1487705424440875, + "grad_norm": 1.2586766809004215, + "learning_rate": 8.385238475106287e-06, + "loss": 0.269240140914917, + "step": 2325 + }, + { + "epoch": 1.1492647967379217, + "grad_norm": 1.301058586916402, + "learning_rate": 8.377175648752236e-06, + "loss": 0.2668418288230896, + "step": 2326 + }, + { + "epoch": 1.149759051031756, + "grad_norm": 1.2869179599070777, + "learning_rate": 8.369113905772532e-06, + "loss": 0.29276758432388306, + "step": 2327 + }, + { + "epoch": 1.15025330532559, + "grad_norm": 1.240170388592341, + "learning_rate": 8.361053251549083e-06, + "loss": 0.26562872529029846, + "step": 2328 + }, + { + "epoch": 1.150747559619424, + "grad_norm": 1.2907483203574122, + "learning_rate": 8.352993691463063e-06, + "loss": 0.257779061794281, + "step": 2329 + }, + { + "epoch": 1.1512418139132583, + "grad_norm": 1.3761256870332743, + "learning_rate": 8.344935230894926e-06, + "loss": 0.2871868312358856, + "step": 2330 + }, + { + "epoch": 1.1517360682070925, + "grad_norm": 1.2766304490065612, + "learning_rate": 8.336877875224379e-06, + "loss": 0.25191348791122437, + "step": 2331 + }, + { + "epoch": 1.1522303225009267, + "grad_norm": 1.1532415542893881, + "learning_rate": 8.3288216298304e-06, + "loss": 0.27057239413261414, + "step": 2332 + }, + { + "epoch": 1.152724576794761, + "grad_norm": 1.3903855220327628, + "learning_rate": 8.32076650009122e-06, + "loss": 0.31574326753616333, + "step": 2333 + }, + { + "epoch": 1.1532188310885951, + "grad_norm": 1.5549371484345924, + "learning_rate": 8.312712491384332e-06, + "loss": 0.22503693401813507, + "step": 2334 + }, + { + "epoch": 1.1537130853824293, + "grad_norm": 1.2363735263099107, + "learning_rate": 8.304659609086478e-06, + "loss": 0.25754863023757935, + "step": 2335 + }, + { + "epoch": 1.1542073396762635, + "grad_norm": 1.1790773293013888, + "learning_rate": 8.296607858573646e-06, + "loss": 0.24367934465408325, + "step": 2336 + }, + { + "epoch": 1.1547015939700975, + "grad_norm": 1.2792693896599328, + "learning_rate": 8.288557245221068e-06, + "loss": 0.28907084465026855, + "step": 2337 + }, + { + "epoch": 1.1551958482639317, + "grad_norm": 1.1852966795691644, + "learning_rate": 8.280507774403217e-06, + "loss": 0.24526283144950867, + "step": 2338 + }, + { + "epoch": 1.155690102557766, + "grad_norm": 1.1473094958169556, + "learning_rate": 8.272459451493811e-06, + "loss": 0.21968787908554077, + "step": 2339 + }, + { + "epoch": 1.1561843568516001, + "grad_norm": 1.1613080622383485, + "learning_rate": 8.264412281865791e-06, + "loss": 0.23803061246871948, + "step": 2340 + }, + { + "epoch": 1.1566786111454344, + "grad_norm": 1.2818425210270699, + "learning_rate": 8.256366270891335e-06, + "loss": 0.25715917348861694, + "step": 2341 + }, + { + "epoch": 1.1571728654392686, + "grad_norm": 1.300939575113673, + "learning_rate": 8.248321423941836e-06, + "loss": 0.29443520307540894, + "step": 2342 + }, + { + "epoch": 1.1576671197331028, + "grad_norm": 1.2224332053171705, + "learning_rate": 8.240277746387934e-06, + "loss": 0.24904949963092804, + "step": 2343 + }, + { + "epoch": 1.1581613740269368, + "grad_norm": 1.2866663921835886, + "learning_rate": 8.23223524359946e-06, + "loss": 0.2594628632068634, + "step": 2344 + }, + { + "epoch": 1.158655628320771, + "grad_norm": 1.2731058113968243, + "learning_rate": 8.224193920945482e-06, + "loss": 0.23853302001953125, + "step": 2345 + }, + { + "epoch": 1.1591498826146052, + "grad_norm": 1.3394742959570003, + "learning_rate": 8.216153783794266e-06, + "loss": 0.25465112924575806, + "step": 2346 + }, + { + "epoch": 1.1596441369084394, + "grad_norm": 1.3135301213887383, + "learning_rate": 8.208114837513297e-06, + "loss": 0.28038230538368225, + "step": 2347 + }, + { + "epoch": 1.1601383912022736, + "grad_norm": 1.350685866794537, + "learning_rate": 8.200077087469262e-06, + "loss": 0.3144591450691223, + "step": 2348 + }, + { + "epoch": 1.1606326454961078, + "grad_norm": 1.281224607522297, + "learning_rate": 8.192040539028047e-06, + "loss": 0.25782787799835205, + "step": 2349 + }, + { + "epoch": 1.161126899789942, + "grad_norm": 1.5124699254380607, + "learning_rate": 8.18400519755473e-06, + "loss": 0.21928566694259644, + "step": 2350 + }, + { + "epoch": 1.1616211540837762, + "grad_norm": 1.2617101773123074, + "learning_rate": 8.175971068413598e-06, + "loss": 0.2277221381664276, + "step": 2351 + }, + { + "epoch": 1.1621154083776104, + "grad_norm": 1.3465952359588251, + "learning_rate": 8.16793815696812e-06, + "loss": 0.26971378922462463, + "step": 2352 + }, + { + "epoch": 1.1626096626714444, + "grad_norm": 1.352802202139023, + "learning_rate": 8.15990646858095e-06, + "loss": 0.26448535919189453, + "step": 2353 + }, + { + "epoch": 1.1631039169652786, + "grad_norm": 1.3091049684475664, + "learning_rate": 8.151876008613927e-06, + "loss": 0.26372095942497253, + "step": 2354 + }, + { + "epoch": 1.1635981712591128, + "grad_norm": 1.3450938198850664, + "learning_rate": 8.143846782428078e-06, + "loss": 0.2594243288040161, + "step": 2355 + }, + { + "epoch": 1.164092425552947, + "grad_norm": 1.2377171543356333, + "learning_rate": 8.135818795383597e-06, + "loss": 0.23994986712932587, + "step": 2356 + }, + { + "epoch": 1.1645866798467812, + "grad_norm": 1.2983017697862052, + "learning_rate": 8.12779205283985e-06, + "loss": 0.2746032476425171, + "step": 2357 + }, + { + "epoch": 1.1650809341406154, + "grad_norm": 1.3938993958898265, + "learning_rate": 8.119766560155377e-06, + "loss": 0.3323846161365509, + "step": 2358 + }, + { + "epoch": 1.1655751884344494, + "grad_norm": 1.3890076094482564, + "learning_rate": 8.111742322687886e-06, + "loss": 0.28155508637428284, + "step": 2359 + }, + { + "epoch": 1.1660694427282836, + "grad_norm": 1.361844276882708, + "learning_rate": 8.103719345794237e-06, + "loss": 0.2936748266220093, + "step": 2360 + }, + { + "epoch": 1.1665636970221178, + "grad_norm": 1.2168650482731003, + "learning_rate": 8.095697634830463e-06, + "loss": 0.23575282096862793, + "step": 2361 + }, + { + "epoch": 1.167057951315952, + "grad_norm": 1.277845029620416, + "learning_rate": 8.087677195151737e-06, + "loss": 0.24547496438026428, + "step": 2362 + }, + { + "epoch": 1.1675522056097862, + "grad_norm": 1.3371291006512767, + "learning_rate": 8.079658032112388e-06, + "loss": 0.2936372458934784, + "step": 2363 + }, + { + "epoch": 1.1680464599036204, + "grad_norm": 1.316297337509115, + "learning_rate": 8.071640151065902e-06, + "loss": 0.28602418303489685, + "step": 2364 + }, + { + "epoch": 1.1685407141974546, + "grad_norm": 1.271542457187923, + "learning_rate": 8.0636235573649e-06, + "loss": 0.2742761969566345, + "step": 2365 + }, + { + "epoch": 1.1690349684912889, + "grad_norm": 1.2379702024007857, + "learning_rate": 8.05560825636114e-06, + "loss": 0.2590268552303314, + "step": 2366 + }, + { + "epoch": 1.169529222785123, + "grad_norm": 1.2195835846594238, + "learning_rate": 8.047594253405525e-06, + "loss": 0.26881399750709534, + "step": 2367 + }, + { + "epoch": 1.170023477078957, + "grad_norm": 1.279205613064969, + "learning_rate": 8.039581553848093e-06, + "loss": 0.27069953083992004, + "step": 2368 + }, + { + "epoch": 1.1705177313727912, + "grad_norm": 1.1650094541250327, + "learning_rate": 8.031570163038005e-06, + "loss": 0.27320611476898193, + "step": 2369 + }, + { + "epoch": 1.1710119856666255, + "grad_norm": 1.289507742767465, + "learning_rate": 8.023560086323548e-06, + "loss": 0.26400327682495117, + "step": 2370 + }, + { + "epoch": 1.1715062399604597, + "grad_norm": 1.1403608861276666, + "learning_rate": 8.015551329052136e-06, + "loss": 0.22287744283676147, + "step": 2371 + }, + { + "epoch": 1.1720004942542939, + "grad_norm": 1.2409841787965832, + "learning_rate": 8.007543896570309e-06, + "loss": 0.28240424394607544, + "step": 2372 + }, + { + "epoch": 1.172494748548128, + "grad_norm": 1.3414402473623117, + "learning_rate": 7.999537794223702e-06, + "loss": 0.27119147777557373, + "step": 2373 + }, + { + "epoch": 1.1729890028419623, + "grad_norm": 1.376418134177551, + "learning_rate": 7.991533027357085e-06, + "loss": 0.2579900920391083, + "step": 2374 + }, + { + "epoch": 1.1734832571357963, + "grad_norm": 1.197547817498857, + "learning_rate": 7.983529601314317e-06, + "loss": 0.25550374388694763, + "step": 2375 + }, + { + "epoch": 1.1739775114296305, + "grad_norm": 1.119102387270249, + "learning_rate": 7.97552752143838e-06, + "loss": 0.21197429299354553, + "step": 2376 + }, + { + "epoch": 1.1744717657234647, + "grad_norm": 1.245296460371477, + "learning_rate": 7.96752679307134e-06, + "loss": 0.28724029660224915, + "step": 2377 + }, + { + "epoch": 1.1749660200172989, + "grad_norm": 1.119081251981291, + "learning_rate": 7.959527421554375e-06, + "loss": 0.24320468306541443, + "step": 2378 + }, + { + "epoch": 1.175460274311133, + "grad_norm": 1.1094352642608503, + "learning_rate": 7.951529412227745e-06, + "loss": 0.22487501800060272, + "step": 2379 + }, + { + "epoch": 1.1759545286049673, + "grad_norm": 1.1424975538486684, + "learning_rate": 7.943532770430811e-06, + "loss": 0.2754969894886017, + "step": 2380 + }, + { + "epoch": 1.1764487828988015, + "grad_norm": 1.2424832323819373, + "learning_rate": 7.93553750150202e-06, + "loss": 0.2734825909137726, + "step": 2381 + }, + { + "epoch": 1.1769430371926357, + "grad_norm": 1.3311172796502668, + "learning_rate": 7.927543610778895e-06, + "loss": 0.2803332209587097, + "step": 2382 + }, + { + "epoch": 1.1774372914864697, + "grad_norm": 1.3572589379934268, + "learning_rate": 7.919551103598037e-06, + "loss": 0.2820316255092621, + "step": 2383 + }, + { + "epoch": 1.177931545780304, + "grad_norm": 1.1984541262238777, + "learning_rate": 7.911559985295142e-06, + "loss": 0.26788315176963806, + "step": 2384 + }, + { + "epoch": 1.178425800074138, + "grad_norm": 1.152974420484647, + "learning_rate": 7.90357026120496e-06, + "loss": 0.2562825083732605, + "step": 2385 + }, + { + "epoch": 1.1789200543679723, + "grad_norm": 1.3733272776027918, + "learning_rate": 7.895581936661316e-06, + "loss": 0.28260675072669983, + "step": 2386 + }, + { + "epoch": 1.1794143086618065, + "grad_norm": 1.2509507258139472, + "learning_rate": 7.887595016997105e-06, + "loss": 0.25887200236320496, + "step": 2387 + }, + { + "epoch": 1.1799085629556407, + "grad_norm": 1.1852436756934879, + "learning_rate": 7.879609507544274e-06, + "loss": 0.2351648062467575, + "step": 2388 + }, + { + "epoch": 1.180402817249475, + "grad_norm": 1.310528017980178, + "learning_rate": 7.871625413633843e-06, + "loss": 0.2958889305591583, + "step": 2389 + }, + { + "epoch": 1.180897071543309, + "grad_norm": 1.260660594043313, + "learning_rate": 7.863642740595873e-06, + "loss": 0.29704710841178894, + "step": 2390 + }, + { + "epoch": 1.1813913258371431, + "grad_norm": 1.1273593973839822, + "learning_rate": 7.855661493759488e-06, + "loss": 0.23283210396766663, + "step": 2391 + }, + { + "epoch": 1.1818855801309773, + "grad_norm": 1.1497387573049556, + "learning_rate": 7.847681678452846e-06, + "loss": 0.22818870842456818, + "step": 2392 + }, + { + "epoch": 1.1823798344248115, + "grad_norm": 1.2334848445567106, + "learning_rate": 7.839703300003163e-06, + "loss": 0.2345077246427536, + "step": 2393 + }, + { + "epoch": 1.1828740887186457, + "grad_norm": 1.3979127898652413, + "learning_rate": 7.831726363736694e-06, + "loss": 0.31161409616470337, + "step": 2394 + }, + { + "epoch": 1.18336834301248, + "grad_norm": 1.3157666615230723, + "learning_rate": 7.823750874978724e-06, + "loss": 0.2958439588546753, + "step": 2395 + }, + { + "epoch": 1.1838625973063142, + "grad_norm": 1.1914805532137183, + "learning_rate": 7.815776839053568e-06, + "loss": 0.24895446002483368, + "step": 2396 + }, + { + "epoch": 1.1843568516001484, + "grad_norm": 1.189611866561264, + "learning_rate": 7.807804261284591e-06, + "loss": 0.2691795825958252, + "step": 2397 + }, + { + "epoch": 1.1848511058939826, + "grad_norm": 1.2282823509277643, + "learning_rate": 7.799833146994165e-06, + "loss": 0.26797783374786377, + "step": 2398 + }, + { + "epoch": 1.1853453601878166, + "grad_norm": 1.2297499766268158, + "learning_rate": 7.791863501503694e-06, + "loss": 0.2665610611438751, + "step": 2399 + }, + { + "epoch": 1.1858396144816508, + "grad_norm": 1.1290863581864232, + "learning_rate": 7.783895330133596e-06, + "loss": 0.24712792038917542, + "step": 2400 + }, + { + "epoch": 1.186333868775485, + "grad_norm": 1.2300895404986125, + "learning_rate": 7.775928638203316e-06, + "loss": 0.24131645262241364, + "step": 2401 + }, + { + "epoch": 1.1868281230693192, + "grad_norm": 1.2566198414342145, + "learning_rate": 7.7679634310313e-06, + "loss": 0.24233923852443695, + "step": 2402 + }, + { + "epoch": 1.1873223773631534, + "grad_norm": 1.2397915401139883, + "learning_rate": 7.759999713935002e-06, + "loss": 0.24929150938987732, + "step": 2403 + }, + { + "epoch": 1.1878166316569876, + "grad_norm": 1.2005274695814647, + "learning_rate": 7.752037492230887e-06, + "loss": 0.266767293214798, + "step": 2404 + }, + { + "epoch": 1.1883108859508218, + "grad_norm": 1.2083997342227277, + "learning_rate": 7.744076771234427e-06, + "loss": 0.257263720035553, + "step": 2405 + }, + { + "epoch": 1.1888051402446558, + "grad_norm": 1.3017758985808945, + "learning_rate": 7.73611755626008e-06, + "loss": 0.26949891448020935, + "step": 2406 + }, + { + "epoch": 1.18929939453849, + "grad_norm": 1.3523825920294412, + "learning_rate": 7.728159852621308e-06, + "loss": 0.250274121761322, + "step": 2407 + }, + { + "epoch": 1.1897936488323242, + "grad_norm": 1.6370645689880403, + "learning_rate": 7.720203665630553e-06, + "loss": 0.2442864030599594, + "step": 2408 + }, + { + "epoch": 1.1902879031261584, + "grad_norm": 1.4258170868908235, + "learning_rate": 7.71224900059926e-06, + "loss": 0.273416131734848, + "step": 2409 + }, + { + "epoch": 1.1907821574199926, + "grad_norm": 1.2547538223250059, + "learning_rate": 7.704295862837845e-06, + "loss": 0.2559645175933838, + "step": 2410 + }, + { + "epoch": 1.1912764117138268, + "grad_norm": 1.3439078919148493, + "learning_rate": 7.696344257655713e-06, + "loss": 0.2793371379375458, + "step": 2411 + }, + { + "epoch": 1.191770666007661, + "grad_norm": 1.1661216324600743, + "learning_rate": 7.688394190361235e-06, + "loss": 0.23739437758922577, + "step": 2412 + }, + { + "epoch": 1.1922649203014952, + "grad_norm": 1.293132062594429, + "learning_rate": 7.680445666261766e-06, + "loss": 0.27027466893196106, + "step": 2413 + }, + { + "epoch": 1.1927591745953292, + "grad_norm": 1.2887121644516222, + "learning_rate": 7.672498690663632e-06, + "loss": 0.2641778886318207, + "step": 2414 + }, + { + "epoch": 1.1932534288891634, + "grad_norm": 1.235898023301149, + "learning_rate": 7.664553268872116e-06, + "loss": 0.25086820125579834, + "step": 2415 + }, + { + "epoch": 1.1937476831829976, + "grad_norm": 1.6761712741491541, + "learning_rate": 7.656609406191467e-06, + "loss": 0.2871254086494446, + "step": 2416 + }, + { + "epoch": 1.1942419374768318, + "grad_norm": 1.193500770631568, + "learning_rate": 7.648667107924893e-06, + "loss": 0.2657528221607208, + "step": 2417 + }, + { + "epoch": 1.194736191770666, + "grad_norm": 1.3739698225148846, + "learning_rate": 7.640726379374564e-06, + "loss": 0.26942694187164307, + "step": 2418 + }, + { + "epoch": 1.1952304460645002, + "grad_norm": 1.1561137180130854, + "learning_rate": 7.632787225841593e-06, + "loss": 0.23883840441703796, + "step": 2419 + }, + { + "epoch": 1.1957247003583344, + "grad_norm": 1.215726770348901, + "learning_rate": 7.624849652626049e-06, + "loss": 0.24837304651737213, + "step": 2420 + }, + { + "epoch": 1.1962189546521684, + "grad_norm": 1.194954932679119, + "learning_rate": 7.616913665026936e-06, + "loss": 0.2882450222969055, + "step": 2421 + }, + { + "epoch": 1.1967132089460026, + "grad_norm": 1.4557191034476904, + "learning_rate": 7.608979268342213e-06, + "loss": 0.25877460837364197, + "step": 2422 + }, + { + "epoch": 1.1972074632398368, + "grad_norm": 1.2343724838571453, + "learning_rate": 7.601046467868767e-06, + "loss": 0.26970750093460083, + "step": 2423 + }, + { + "epoch": 1.197701717533671, + "grad_norm": 1.1598747816375319, + "learning_rate": 7.593115268902423e-06, + "loss": 0.23771706223487854, + "step": 2424 + }, + { + "epoch": 1.1981959718275053, + "grad_norm": 1.1949187968831856, + "learning_rate": 7.585185676737932e-06, + "loss": 0.25420787930488586, + "step": 2425 + }, + { + "epoch": 1.1986902261213395, + "grad_norm": 1.248194263596005, + "learning_rate": 7.577257696668982e-06, + "loss": 0.2551025152206421, + "step": 2426 + }, + { + "epoch": 1.1991844804151737, + "grad_norm": 1.1913659485965633, + "learning_rate": 7.569331333988177e-06, + "loss": 0.2302972972393036, + "step": 2427 + }, + { + "epoch": 1.1996787347090079, + "grad_norm": 1.340176223566515, + "learning_rate": 7.561406593987045e-06, + "loss": 0.25811445713043213, + "step": 2428 + }, + { + "epoch": 1.200172989002842, + "grad_norm": 1.1946803554276415, + "learning_rate": 7.5534834819560235e-06, + "loss": 0.2550782561302185, + "step": 2429 + }, + { + "epoch": 1.200667243296676, + "grad_norm": 1.3588122473637638, + "learning_rate": 7.545562003184474e-06, + "loss": 0.24825535714626312, + "step": 2430 + }, + { + "epoch": 1.2011614975905103, + "grad_norm": 1.3105140055807547, + "learning_rate": 7.537642162960664e-06, + "loss": 0.29703712463378906, + "step": 2431 + }, + { + "epoch": 1.2016557518843445, + "grad_norm": 1.2707072551305245, + "learning_rate": 7.5297239665717625e-06, + "loss": 0.26830747723579407, + "step": 2432 + }, + { + "epoch": 1.2021500061781787, + "grad_norm": 1.2272388404108225, + "learning_rate": 7.521807419303846e-06, + "loss": 0.2428341656923294, + "step": 2433 + }, + { + "epoch": 1.202644260472013, + "grad_norm": 1.3310573803274635, + "learning_rate": 7.513892526441883e-06, + "loss": 0.2843051552772522, + "step": 2434 + }, + { + "epoch": 1.203138514765847, + "grad_norm": 1.297091941411815, + "learning_rate": 7.50597929326975e-06, + "loss": 0.2485228031873703, + "step": 2435 + }, + { + "epoch": 1.203632769059681, + "grad_norm": 1.3716686006321661, + "learning_rate": 7.498067725070206e-06, + "loss": 0.25343626737594604, + "step": 2436 + }, + { + "epoch": 1.2041270233535153, + "grad_norm": 1.3197919626781558, + "learning_rate": 7.490157827124902e-06, + "loss": 0.24906575679779053, + "step": 2437 + }, + { + "epoch": 1.2046212776473495, + "grad_norm": 1.6398204697926184, + "learning_rate": 7.4822496047143665e-06, + "loss": 0.33576443791389465, + "step": 2438 + }, + { + "epoch": 1.2051155319411837, + "grad_norm": 1.341601959864184, + "learning_rate": 7.474343063118023e-06, + "loss": 0.2755683362483978, + "step": 2439 + }, + { + "epoch": 1.205609786235018, + "grad_norm": 1.259839098151577, + "learning_rate": 7.466438207614165e-06, + "loss": 0.2667745351791382, + "step": 2440 + }, + { + "epoch": 1.2061040405288521, + "grad_norm": 1.3942381323272646, + "learning_rate": 7.458535043479959e-06, + "loss": 0.2970271408557892, + "step": 2441 + }, + { + "epoch": 1.2065982948226863, + "grad_norm": 1.2934031608191798, + "learning_rate": 7.450633575991442e-06, + "loss": 0.2628048360347748, + "step": 2442 + }, + { + "epoch": 1.2070925491165205, + "grad_norm": 1.3935428467061275, + "learning_rate": 7.442733810423526e-06, + "loss": 0.29923003911972046, + "step": 2443 + }, + { + "epoch": 1.2075868034103547, + "grad_norm": 1.2121764987473183, + "learning_rate": 7.4348357520499805e-06, + "loss": 0.2486419975757599, + "step": 2444 + }, + { + "epoch": 1.2080810577041887, + "grad_norm": 1.2651423288599317, + "learning_rate": 7.4269394061434315e-06, + "loss": 0.2711118459701538, + "step": 2445 + }, + { + "epoch": 1.208575311998023, + "grad_norm": 1.2689988235231109, + "learning_rate": 7.419044777975371e-06, + "loss": 0.2568815052509308, + "step": 2446 + }, + { + "epoch": 1.2090695662918571, + "grad_norm": 1.3357220203112758, + "learning_rate": 7.411151872816143e-06, + "loss": 0.2546462416648865, + "step": 2447 + }, + { + "epoch": 1.2095638205856913, + "grad_norm": 1.1716595202066384, + "learning_rate": 7.403260695934933e-06, + "loss": 0.23455393314361572, + "step": 2448 + }, + { + "epoch": 1.2100580748795255, + "grad_norm": 1.3263077198790523, + "learning_rate": 7.395371252599779e-06, + "loss": 0.2874235510826111, + "step": 2449 + }, + { + "epoch": 1.2105523291733598, + "grad_norm": 1.2319732877340805, + "learning_rate": 7.387483548077559e-06, + "loss": 0.2462289184331894, + "step": 2450 + }, + { + "epoch": 1.211046583467194, + "grad_norm": 1.381045021384348, + "learning_rate": 7.379597587633998e-06, + "loss": 0.29385364055633545, + "step": 2451 + }, + { + "epoch": 1.211540837761028, + "grad_norm": 1.1902133906710186, + "learning_rate": 7.371713376533642e-06, + "loss": 0.25049760937690735, + "step": 2452 + }, + { + "epoch": 1.2120350920548622, + "grad_norm": 1.267298470174844, + "learning_rate": 7.363830920039887e-06, + "loss": 0.2748974859714508, + "step": 2453 + }, + { + "epoch": 1.2125293463486964, + "grad_norm": 1.2929931198793703, + "learning_rate": 7.355950223414939e-06, + "loss": 0.2707570791244507, + "step": 2454 + }, + { + "epoch": 1.2130236006425306, + "grad_norm": 1.3328464163268134, + "learning_rate": 7.3480712919198474e-06, + "loss": 0.2864024043083191, + "step": 2455 + }, + { + "epoch": 1.2135178549363648, + "grad_norm": 1.400259353784304, + "learning_rate": 7.340194130814466e-06, + "loss": 0.3181900680065155, + "step": 2456 + }, + { + "epoch": 1.214012109230199, + "grad_norm": 1.2994892273470056, + "learning_rate": 7.332318745357483e-06, + "loss": 0.3022974729537964, + "step": 2457 + }, + { + "epoch": 1.2145063635240332, + "grad_norm": 1.2350650698265369, + "learning_rate": 7.324445140806387e-06, + "loss": 0.2850461006164551, + "step": 2458 + }, + { + "epoch": 1.2150006178178674, + "grad_norm": 1.0534315857750147, + "learning_rate": 7.316573322417483e-06, + "loss": 0.21958643198013306, + "step": 2459 + }, + { + "epoch": 1.2154948721117014, + "grad_norm": 1.3531472648001939, + "learning_rate": 7.3087032954458915e-06, + "loss": 0.2517468333244324, + "step": 2460 + }, + { + "epoch": 1.2159891264055356, + "grad_norm": 1.1714370722498957, + "learning_rate": 7.300835065145526e-06, + "loss": 0.26957637071609497, + "step": 2461 + }, + { + "epoch": 1.2164833806993698, + "grad_norm": 1.2755586367674554, + "learning_rate": 7.292968636769103e-06, + "loss": 0.2699058949947357, + "step": 2462 + }, + { + "epoch": 1.216977634993204, + "grad_norm": 1.2382912705778586, + "learning_rate": 7.285104015568138e-06, + "loss": 0.25076431035995483, + "step": 2463 + }, + { + "epoch": 1.2174718892870382, + "grad_norm": 1.2104527847150177, + "learning_rate": 7.277241206792944e-06, + "loss": 0.24862724542617798, + "step": 2464 + }, + { + "epoch": 1.2179661435808724, + "grad_norm": 1.3107261919810722, + "learning_rate": 7.269380215692614e-06, + "loss": 0.27427712082862854, + "step": 2465 + }, + { + "epoch": 1.2184603978747066, + "grad_norm": 1.2946586839730188, + "learning_rate": 7.261521047515041e-06, + "loss": 0.24343061447143555, + "step": 2466 + }, + { + "epoch": 1.2189546521685406, + "grad_norm": 1.1968860231182823, + "learning_rate": 7.253663707506882e-06, + "loss": 0.25482866168022156, + "step": 2467 + }, + { + "epoch": 1.2194489064623748, + "grad_norm": 1.2806570256332481, + "learning_rate": 7.2458082009135964e-06, + "loss": 0.27699458599090576, + "step": 2468 + }, + { + "epoch": 1.219943160756209, + "grad_norm": 1.3000686730507884, + "learning_rate": 7.237954532979401e-06, + "loss": 0.26576149463653564, + "step": 2469 + }, + { + "epoch": 1.2204374150500432, + "grad_norm": 1.2984838025251157, + "learning_rate": 7.230102708947298e-06, + "loss": 0.287861168384552, + "step": 2470 + }, + { + "epoch": 1.2209316693438774, + "grad_norm": 1.2911534198412806, + "learning_rate": 7.2222527340590434e-06, + "loss": 0.25484874844551086, + "step": 2471 + }, + { + "epoch": 1.2214259236377116, + "grad_norm": 1.284847349415858, + "learning_rate": 7.214404613555177e-06, + "loss": 0.26371529698371887, + "step": 2472 + }, + { + "epoch": 1.2219201779315458, + "grad_norm": 1.334957534550205, + "learning_rate": 7.206558352674992e-06, + "loss": 0.23692578077316284, + "step": 2473 + }, + { + "epoch": 1.22241443222538, + "grad_norm": 1.2696744902236006, + "learning_rate": 7.198713956656538e-06, + "loss": 0.26369085907936096, + "step": 2474 + }, + { + "epoch": 1.2229086865192142, + "grad_norm": 1.4374683516439322, + "learning_rate": 7.1908714307366145e-06, + "loss": 0.260580450296402, + "step": 2475 + }, + { + "epoch": 1.2234029408130482, + "grad_norm": 1.280804641850837, + "learning_rate": 7.1830307801507904e-06, + "loss": 0.2693007290363312, + "step": 2476 + }, + { + "epoch": 1.2238971951068824, + "grad_norm": 1.3429546136121409, + "learning_rate": 7.1751920101333695e-06, + "loss": 0.26629775762557983, + "step": 2477 + }, + { + "epoch": 1.2243914494007166, + "grad_norm": 1.3999841706301799, + "learning_rate": 7.167355125917399e-06, + "loss": 0.2963234782218933, + "step": 2478 + }, + { + "epoch": 1.2248857036945509, + "grad_norm": 1.2332551275962955, + "learning_rate": 7.159520132734669e-06, + "loss": 0.24415187537670135, + "step": 2479 + }, + { + "epoch": 1.225379957988385, + "grad_norm": 1.3645078601677985, + "learning_rate": 7.15168703581572e-06, + "loss": 0.2941599190235138, + "step": 2480 + }, + { + "epoch": 1.2258742122822193, + "grad_norm": 1.2551885597461083, + "learning_rate": 7.1438558403898065e-06, + "loss": 0.22807514667510986, + "step": 2481 + }, + { + "epoch": 1.2263684665760535, + "grad_norm": 1.3774209397395383, + "learning_rate": 7.136026551684923e-06, + "loss": 0.28865426778793335, + "step": 2482 + }, + { + "epoch": 1.2268627208698875, + "grad_norm": 1.3250195381886638, + "learning_rate": 7.1281991749277945e-06, + "loss": 0.3015780448913574, + "step": 2483 + }, + { + "epoch": 1.2273569751637217, + "grad_norm": 1.30264219696165, + "learning_rate": 7.12037371534386e-06, + "loss": 0.2521517872810364, + "step": 2484 + }, + { + "epoch": 1.2278512294575559, + "grad_norm": 1.520486974517902, + "learning_rate": 7.1125501781572896e-06, + "loss": 0.2904277443885803, + "step": 2485 + }, + { + "epoch": 1.22834548375139, + "grad_norm": 1.2434155494713983, + "learning_rate": 7.104728568590966e-06, + "loss": 0.26172375679016113, + "step": 2486 + }, + { + "epoch": 1.2288397380452243, + "grad_norm": 1.3588693705399504, + "learning_rate": 7.096908891866483e-06, + "loss": 0.23565448820590973, + "step": 2487 + }, + { + "epoch": 1.2293339923390585, + "grad_norm": 1.276833588621656, + "learning_rate": 7.0890911532041375e-06, + "loss": 0.2550106644630432, + "step": 2488 + }, + { + "epoch": 1.2298282466328927, + "grad_norm": 1.4167484141197517, + "learning_rate": 7.08127535782295e-06, + "loss": 0.3221823573112488, + "step": 2489 + }, + { + "epoch": 1.230322500926727, + "grad_norm": 1.2657124525427264, + "learning_rate": 7.073461510940631e-06, + "loss": 0.26209163665771484, + "step": 2490 + }, + { + "epoch": 1.2308167552205609, + "grad_norm": 1.3626305998908985, + "learning_rate": 7.06564961777359e-06, + "loss": 0.28635868430137634, + "step": 2491 + }, + { + "epoch": 1.231311009514395, + "grad_norm": 1.417027138446056, + "learning_rate": 7.0578396835369355e-06, + "loss": 0.25630202889442444, + "step": 2492 + }, + { + "epoch": 1.2318052638082293, + "grad_norm": 1.233621488661494, + "learning_rate": 7.050031713444474e-06, + "loss": 0.27345454692840576, + "step": 2493 + }, + { + "epoch": 1.2322995181020635, + "grad_norm": 1.2592068756906736, + "learning_rate": 7.042225712708692e-06, + "loss": 0.2365841269493103, + "step": 2494 + }, + { + "epoch": 1.2327937723958977, + "grad_norm": 1.730933189967813, + "learning_rate": 7.03442168654076e-06, + "loss": 0.2891104221343994, + "step": 2495 + }, + { + "epoch": 1.233288026689732, + "grad_norm": 1.3811266669598459, + "learning_rate": 7.026619640150534e-06, + "loss": 0.2713435888290405, + "step": 2496 + }, + { + "epoch": 1.2337822809835661, + "grad_norm": 1.3509192768016722, + "learning_rate": 7.018819578746557e-06, + "loss": 0.28552842140197754, + "step": 2497 + }, + { + "epoch": 1.2342765352774, + "grad_norm": 1.377186562637688, + "learning_rate": 7.011021507536031e-06, + "loss": 0.2731080949306488, + "step": 2498 + }, + { + "epoch": 1.2347707895712343, + "grad_norm": 1.1800591795719682, + "learning_rate": 7.003225431724841e-06, + "loss": 0.27373206615448, + "step": 2499 + }, + { + "epoch": 1.2352650438650685, + "grad_norm": 1.3197536250384188, + "learning_rate": 6.99543135651753e-06, + "loss": 0.24507245421409607, + "step": 2500 + }, + { + "epoch": 1.2357592981589027, + "grad_norm": 1.2680812543691635, + "learning_rate": 6.9876392871173205e-06, + "loss": 0.2653801739215851, + "step": 2501 + }, + { + "epoch": 1.236253552452737, + "grad_norm": 1.115227060544212, + "learning_rate": 6.979849228726079e-06, + "loss": 0.1929643303155899, + "step": 2502 + }, + { + "epoch": 1.2367478067465711, + "grad_norm": 1.330653204132735, + "learning_rate": 6.972061186544341e-06, + "loss": 0.2684918940067291, + "step": 2503 + }, + { + "epoch": 1.2372420610404053, + "grad_norm": 1.2129572179563677, + "learning_rate": 6.964275165771288e-06, + "loss": 0.23158729076385498, + "step": 2504 + }, + { + "epoch": 1.2377363153342396, + "grad_norm": 1.3192284190451669, + "learning_rate": 6.95649117160476e-06, + "loss": 0.24757611751556396, + "step": 2505 + }, + { + "epoch": 1.2382305696280738, + "grad_norm": 1.328208985585749, + "learning_rate": 6.9487092092412425e-06, + "loss": 0.2651844620704651, + "step": 2506 + }, + { + "epoch": 1.2387248239219077, + "grad_norm": 1.3550284074069674, + "learning_rate": 6.940929283875859e-06, + "loss": 0.26745620369911194, + "step": 2507 + }, + { + "epoch": 1.239219078215742, + "grad_norm": 1.2361002758783033, + "learning_rate": 6.933151400702374e-06, + "loss": 0.22088846564292908, + "step": 2508 + }, + { + "epoch": 1.2397133325095762, + "grad_norm": 1.2379679284464757, + "learning_rate": 6.925375564913193e-06, + "loss": 0.2662886381149292, + "step": 2509 + }, + { + "epoch": 1.2402075868034104, + "grad_norm": 1.3634625495618726, + "learning_rate": 6.917601781699357e-06, + "loss": 0.2691834270954132, + "step": 2510 + }, + { + "epoch": 1.2407018410972446, + "grad_norm": 1.1575744185130052, + "learning_rate": 6.909830056250527e-06, + "loss": 0.2110689878463745, + "step": 2511 + }, + { + "epoch": 1.2411960953910788, + "grad_norm": 1.2961548823459923, + "learning_rate": 6.902060393755001e-06, + "loss": 0.29281991720199585, + "step": 2512 + }, + { + "epoch": 1.2416903496849128, + "grad_norm": 1.2724295845366205, + "learning_rate": 6.894292799399688e-06, + "loss": 0.27409040927886963, + "step": 2513 + }, + { + "epoch": 1.242184603978747, + "grad_norm": 1.304980332058365, + "learning_rate": 6.886527278370131e-06, + "loss": 0.29440224170684814, + "step": 2514 + }, + { + "epoch": 1.2426788582725812, + "grad_norm": 1.1224782958445216, + "learning_rate": 6.878763835850475e-06, + "loss": 0.23107948899269104, + "step": 2515 + }, + { + "epoch": 1.2431731125664154, + "grad_norm": 1.55997556893969, + "learning_rate": 6.871002477023488e-06, + "loss": 0.2682652473449707, + "step": 2516 + }, + { + "epoch": 1.2436673668602496, + "grad_norm": 1.2329698948831815, + "learning_rate": 6.863243207070534e-06, + "loss": 0.2935982644557953, + "step": 2517 + }, + { + "epoch": 1.2441616211540838, + "grad_norm": 1.4373018605291157, + "learning_rate": 6.855486031171597e-06, + "loss": 0.29027625918388367, + "step": 2518 + }, + { + "epoch": 1.244655875447918, + "grad_norm": 1.2739101669235458, + "learning_rate": 6.84773095450526e-06, + "loss": 0.25107353925704956, + "step": 2519 + }, + { + "epoch": 1.2451501297417522, + "grad_norm": 1.2325888755211254, + "learning_rate": 6.839977982248697e-06, + "loss": 0.279231995344162, + "step": 2520 + }, + { + "epoch": 1.2456443840355864, + "grad_norm": 1.2006221660421637, + "learning_rate": 6.832227119577677e-06, + "loss": 0.2544802129268646, + "step": 2521 + }, + { + "epoch": 1.2461386383294204, + "grad_norm": 1.397981415575177, + "learning_rate": 6.824478371666573e-06, + "loss": 0.24365633726119995, + "step": 2522 + }, + { + "epoch": 1.2466328926232546, + "grad_norm": 1.1393524200353975, + "learning_rate": 6.816731743688336e-06, + "loss": 0.2673290967941284, + "step": 2523 + }, + { + "epoch": 1.2471271469170888, + "grad_norm": 1.284093438519867, + "learning_rate": 6.808987240814504e-06, + "loss": 0.23896455764770508, + "step": 2524 + }, + { + "epoch": 1.247621401210923, + "grad_norm": 1.200000168994301, + "learning_rate": 6.801244868215192e-06, + "loss": 0.23196406662464142, + "step": 2525 + }, + { + "epoch": 1.2481156555047572, + "grad_norm": 1.2289321548733863, + "learning_rate": 6.793504631059106e-06, + "loss": 0.24249708652496338, + "step": 2526 + }, + { + "epoch": 1.2486099097985914, + "grad_norm": 1.1511217069627229, + "learning_rate": 6.785766534513514e-06, + "loss": 0.2366780787706375, + "step": 2527 + }, + { + "epoch": 1.2491041640924256, + "grad_norm": 1.291146988373714, + "learning_rate": 6.778030583744254e-06, + "loss": 0.2615105211734772, + "step": 2528 + }, + { + "epoch": 1.2495984183862596, + "grad_norm": 1.4688230831159943, + "learning_rate": 6.770296783915738e-06, + "loss": 0.29761314392089844, + "step": 2529 + }, + { + "epoch": 1.2500926726800938, + "grad_norm": 1.2928438568936322, + "learning_rate": 6.762565140190948e-06, + "loss": 0.25020867586135864, + "step": 2530 + }, + { + "epoch": 1.250586926973928, + "grad_norm": 1.3858962507108388, + "learning_rate": 6.754835657731409e-06, + "loss": 0.2716590166091919, + "step": 2531 + }, + { + "epoch": 1.2510811812677622, + "grad_norm": 1.4048062063243787, + "learning_rate": 6.747108341697221e-06, + "loss": 0.27042001485824585, + "step": 2532 + }, + { + "epoch": 1.2515754355615964, + "grad_norm": 1.3297085932201778, + "learning_rate": 6.739383197247023e-06, + "loss": 0.2659035325050354, + "step": 2533 + }, + { + "epoch": 1.2520696898554307, + "grad_norm": 1.3945414928963702, + "learning_rate": 6.731660229538014e-06, + "loss": 0.2803581655025482, + "step": 2534 + }, + { + "epoch": 1.2525639441492649, + "grad_norm": 1.1484885760506975, + "learning_rate": 6.723939443725938e-06, + "loss": 0.24422097206115723, + "step": 2535 + }, + { + "epoch": 1.253058198443099, + "grad_norm": 1.5676789145324774, + "learning_rate": 6.71622084496508e-06, + "loss": 0.30003631114959717, + "step": 2536 + }, + { + "epoch": 1.2535524527369333, + "grad_norm": 1.3207189074013763, + "learning_rate": 6.708504438408265e-06, + "loss": 0.25745317339897156, + "step": 2537 + }, + { + "epoch": 1.2540467070307673, + "grad_norm": 1.3298790802481242, + "learning_rate": 6.700790229206856e-06, + "loss": 0.27648618817329407, + "step": 2538 + }, + { + "epoch": 1.2545409613246015, + "grad_norm": 1.2910375745243117, + "learning_rate": 6.6930782225107536e-06, + "loss": 0.2579975724220276, + "step": 2539 + }, + { + "epoch": 1.2550352156184357, + "grad_norm": 1.3321333943034437, + "learning_rate": 6.68536842346838e-06, + "loss": 0.2806825637817383, + "step": 2540 + }, + { + "epoch": 1.2555294699122699, + "grad_norm": 1.5211080365897773, + "learning_rate": 6.677660837226685e-06, + "loss": 0.2641657888889313, + "step": 2541 + }, + { + "epoch": 1.256023724206104, + "grad_norm": 1.3170844434659201, + "learning_rate": 6.669955468931142e-06, + "loss": 0.25483542680740356, + "step": 2542 + }, + { + "epoch": 1.2565179784999383, + "grad_norm": 1.438596032878092, + "learning_rate": 6.662252323725751e-06, + "loss": 0.264334112405777, + "step": 2543 + }, + { + "epoch": 1.2570122327937723, + "grad_norm": 1.2825942587632855, + "learning_rate": 6.654551406753017e-06, + "loss": 0.2541567385196686, + "step": 2544 + }, + { + "epoch": 1.2575064870876065, + "grad_norm": 1.3007868833040497, + "learning_rate": 6.646852723153965e-06, + "loss": 0.2695424258708954, + "step": 2545 + }, + { + "epoch": 1.2580007413814407, + "grad_norm": 1.2114763710946868, + "learning_rate": 6.63915627806812e-06, + "loss": 0.2694344222545624, + "step": 2546 + }, + { + "epoch": 1.258494995675275, + "grad_norm": 1.3203626104751756, + "learning_rate": 6.631462076633527e-06, + "loss": 0.2695961892604828, + "step": 2547 + }, + { + "epoch": 1.258989249969109, + "grad_norm": 1.43655166025842, + "learning_rate": 6.623770123986719e-06, + "loss": 0.26878753304481506, + "step": 2548 + }, + { + "epoch": 1.2594835042629433, + "grad_norm": 1.4117532208090406, + "learning_rate": 6.616080425262738e-06, + "loss": 0.27568501234054565, + "step": 2549 + }, + { + "epoch": 1.2599777585567775, + "grad_norm": 1.4407785281346286, + "learning_rate": 6.608392985595111e-06, + "loss": 0.2991989254951477, + "step": 2550 + }, + { + "epoch": 1.2604720128506117, + "grad_norm": 1.2938769852574108, + "learning_rate": 6.600707810115869e-06, + "loss": 0.21832239627838135, + "step": 2551 + }, + { + "epoch": 1.260966267144446, + "grad_norm": 1.3528768023288296, + "learning_rate": 6.593024903955525e-06, + "loss": 0.2671685516834259, + "step": 2552 + }, + { + "epoch": 1.26146052143828, + "grad_norm": 1.142061359022944, + "learning_rate": 6.585344272243073e-06, + "loss": 0.23399557173252106, + "step": 2553 + }, + { + "epoch": 1.2619547757321141, + "grad_norm": 1.3000899404630435, + "learning_rate": 6.577665920105996e-06, + "loss": 0.2701990008354187, + "step": 2554 + }, + { + "epoch": 1.2624490300259483, + "grad_norm": 1.216581780326655, + "learning_rate": 6.56998985267025e-06, + "loss": 0.2679189145565033, + "step": 2555 + }, + { + "epoch": 1.2629432843197825, + "grad_norm": 1.3457541131318878, + "learning_rate": 6.562316075060272e-06, + "loss": 0.2597065567970276, + "step": 2556 + }, + { + "epoch": 1.2634375386136167, + "grad_norm": 1.3732680167208262, + "learning_rate": 6.554644592398962e-06, + "loss": 0.2942010462284088, + "step": 2557 + }, + { + "epoch": 1.263931792907451, + "grad_norm": 1.2654921757837638, + "learning_rate": 6.546975409807696e-06, + "loss": 0.2547098994255066, + "step": 2558 + }, + { + "epoch": 1.264426047201285, + "grad_norm": 1.29416806058113, + "learning_rate": 6.539308532406306e-06, + "loss": 0.2779114246368408, + "step": 2559 + }, + { + "epoch": 1.2649203014951191, + "grad_norm": 1.2525651200835928, + "learning_rate": 6.531643965313093e-06, + "loss": 0.22318917512893677, + "step": 2560 + }, + { + "epoch": 1.2654145557889533, + "grad_norm": 1.2931765026229116, + "learning_rate": 6.523981713644814e-06, + "loss": 0.25439128279685974, + "step": 2561 + }, + { + "epoch": 1.2659088100827876, + "grad_norm": 1.1946536852540512, + "learning_rate": 6.516321782516677e-06, + "loss": 0.2317974865436554, + "step": 2562 + }, + { + "epoch": 1.2664030643766218, + "grad_norm": 1.3517228291780166, + "learning_rate": 6.508664177042339e-06, + "loss": 0.273223876953125, + "step": 2563 + }, + { + "epoch": 1.266897318670456, + "grad_norm": 1.3767500694886763, + "learning_rate": 6.501008902333912e-06, + "loss": 0.28408509492874146, + "step": 2564 + }, + { + "epoch": 1.2673915729642902, + "grad_norm": 1.4378995512233899, + "learning_rate": 6.493355963501951e-06, + "loss": 0.2702238857746124, + "step": 2565 + }, + { + "epoch": 1.2678858272581244, + "grad_norm": 1.2819637354130675, + "learning_rate": 6.485705365655441e-06, + "loss": 0.2142164558172226, + "step": 2566 + }, + { + "epoch": 1.2683800815519586, + "grad_norm": 1.4108385899794438, + "learning_rate": 6.478057113901817e-06, + "loss": 0.2654300928115845, + "step": 2567 + }, + { + "epoch": 1.2688743358457928, + "grad_norm": 1.1724627648861543, + "learning_rate": 6.470411213346941e-06, + "loss": 0.24601367115974426, + "step": 2568 + }, + { + "epoch": 1.2693685901396268, + "grad_norm": 1.36613316910106, + "learning_rate": 6.462767669095109e-06, + "loss": 0.26201942563056946, + "step": 2569 + }, + { + "epoch": 1.269862844433461, + "grad_norm": 1.342399065083916, + "learning_rate": 6.455126486249038e-06, + "loss": 0.2839587926864624, + "step": 2570 + }, + { + "epoch": 1.2703570987272952, + "grad_norm": 1.2538564056049797, + "learning_rate": 6.447487669909873e-06, + "loss": 0.21100708842277527, + "step": 2571 + }, + { + "epoch": 1.2708513530211294, + "grad_norm": 1.1457223195177177, + "learning_rate": 6.439851225177185e-06, + "loss": 0.2181582748889923, + "step": 2572 + }, + { + "epoch": 1.2713456073149636, + "grad_norm": 1.397761306307691, + "learning_rate": 6.432217157148948e-06, + "loss": 0.29196488857269287, + "step": 2573 + }, + { + "epoch": 1.2718398616087978, + "grad_norm": 1.3664440708479575, + "learning_rate": 6.424585470921563e-06, + "loss": 0.2365931123495102, + "step": 2574 + }, + { + "epoch": 1.2723341159026318, + "grad_norm": 1.3496940412150429, + "learning_rate": 6.4169561715898255e-06, + "loss": 0.2277393937110901, + "step": 2575 + }, + { + "epoch": 1.272828370196466, + "grad_norm": 1.3624051718280268, + "learning_rate": 6.409329264246956e-06, + "loss": 0.25285032391548157, + "step": 2576 + }, + { + "epoch": 1.2733226244903002, + "grad_norm": 1.2632390853508073, + "learning_rate": 6.401704753984563e-06, + "loss": 0.253650963306427, + "step": 2577 + }, + { + "epoch": 1.2738168787841344, + "grad_norm": 1.264245223392645, + "learning_rate": 6.394082645892668e-06, + "loss": 0.22143784165382385, + "step": 2578 + }, + { + "epoch": 1.2743111330779686, + "grad_norm": 1.3283739907286298, + "learning_rate": 6.3864629450596696e-06, + "loss": 0.27591395378112793, + "step": 2579 + }, + { + "epoch": 1.2748053873718028, + "grad_norm": 1.6236594986793635, + "learning_rate": 6.37884565657238e-06, + "loss": 0.32865333557128906, + "step": 2580 + }, + { + "epoch": 1.275299641665637, + "grad_norm": 1.2172019661301716, + "learning_rate": 6.371230785515992e-06, + "loss": 0.2743702530860901, + "step": 2581 + }, + { + "epoch": 1.2757938959594712, + "grad_norm": 1.2586352823219396, + "learning_rate": 6.3636183369740845e-06, + "loss": 0.23967956006526947, + "step": 2582 + }, + { + "epoch": 1.2762881502533054, + "grad_norm": 1.206746025741565, + "learning_rate": 6.356008316028614e-06, + "loss": 0.2474803626537323, + "step": 2583 + }, + { + "epoch": 1.2767824045471394, + "grad_norm": 1.2591134604976273, + "learning_rate": 6.348400727759925e-06, + "loss": 0.2523267865180969, + "step": 2584 + }, + { + "epoch": 1.2772766588409736, + "grad_norm": 1.3690385191668641, + "learning_rate": 6.340795577246738e-06, + "loss": 0.2549436092376709, + "step": 2585 + }, + { + "epoch": 1.2777709131348078, + "grad_norm": 1.309885921175695, + "learning_rate": 6.333192869566138e-06, + "loss": 0.2602443993091583, + "step": 2586 + }, + { + "epoch": 1.278265167428642, + "grad_norm": 1.248955873440961, + "learning_rate": 6.325592609793588e-06, + "loss": 0.22912462055683136, + "step": 2587 + }, + { + "epoch": 1.2787594217224763, + "grad_norm": 1.3253843576578603, + "learning_rate": 6.317994803002907e-06, + "loss": 0.3004158139228821, + "step": 2588 + }, + { + "epoch": 1.2792536760163105, + "grad_norm": 1.2054603629919527, + "learning_rate": 6.310399454266289e-06, + "loss": 0.25851407647132874, + "step": 2589 + }, + { + "epoch": 1.2797479303101444, + "grad_norm": 1.2857681683589963, + "learning_rate": 6.302806568654277e-06, + "loss": 0.24637526273727417, + "step": 2590 + }, + { + "epoch": 1.2802421846039787, + "grad_norm": 1.2976312908550238, + "learning_rate": 6.295216151235774e-06, + "loss": 0.26500213146209717, + "step": 2591 + }, + { + "epoch": 1.2807364388978129, + "grad_norm": 1.2103490895138174, + "learning_rate": 6.287628207078031e-06, + "loss": 0.24276241660118103, + "step": 2592 + }, + { + "epoch": 1.281230693191647, + "grad_norm": 2.3839558822188787, + "learning_rate": 6.280042741246655e-06, + "loss": 0.27117204666137695, + "step": 2593 + }, + { + "epoch": 1.2817249474854813, + "grad_norm": 1.4461368742366545, + "learning_rate": 6.272459758805596e-06, + "loss": 0.29287856817245483, + "step": 2594 + }, + { + "epoch": 1.2822192017793155, + "grad_norm": 1.4301387064569637, + "learning_rate": 6.26487926481714e-06, + "loss": 0.3065788149833679, + "step": 2595 + }, + { + "epoch": 1.2827134560731497, + "grad_norm": 1.3198078410588965, + "learning_rate": 6.257301264341915e-06, + "loss": 0.2738455533981323, + "step": 2596 + }, + { + "epoch": 1.283207710366984, + "grad_norm": 1.5398007848288653, + "learning_rate": 6.2497257624388915e-06, + "loss": 0.24216318130493164, + "step": 2597 + }, + { + "epoch": 1.283701964660818, + "grad_norm": 1.2565420891983292, + "learning_rate": 6.242152764165368e-06, + "loss": 0.276785671710968, + "step": 2598 + }, + { + "epoch": 1.2841962189546523, + "grad_norm": 1.2307015932000853, + "learning_rate": 6.234582274576961e-06, + "loss": 0.24999365210533142, + "step": 2599 + }, + { + "epoch": 1.2846904732484863, + "grad_norm": 1.2824145770644522, + "learning_rate": 6.227014298727627e-06, + "loss": 0.27714112401008606, + "step": 2600 + }, + { + "epoch": 1.2851847275423205, + "grad_norm": 1.2260344372038856, + "learning_rate": 6.219448841669639e-06, + "loss": 0.2422318160533905, + "step": 2601 + }, + { + "epoch": 1.2856789818361547, + "grad_norm": 1.3255802725159413, + "learning_rate": 6.21188590845359e-06, + "loss": 0.26688697934150696, + "step": 2602 + }, + { + "epoch": 1.286173236129989, + "grad_norm": 1.2753676961687272, + "learning_rate": 6.204325504128379e-06, + "loss": 0.256889671087265, + "step": 2603 + }, + { + "epoch": 1.2866674904238231, + "grad_norm": 1.3013140965176258, + "learning_rate": 6.196767633741225e-06, + "loss": 0.27372461557388306, + "step": 2604 + }, + { + "epoch": 1.287161744717657, + "grad_norm": 1.3064762941978003, + "learning_rate": 6.189212302337663e-06, + "loss": 0.25194403529167175, + "step": 2605 + }, + { + "epoch": 1.2876559990114913, + "grad_norm": 1.2533511197404907, + "learning_rate": 6.181659514961515e-06, + "loss": 0.24381688237190247, + "step": 2606 + }, + { + "epoch": 1.2881502533053255, + "grad_norm": 1.2987400887924563, + "learning_rate": 6.17410927665492e-06, + "loss": 0.255805104970932, + "step": 2607 + }, + { + "epoch": 1.2886445075991597, + "grad_norm": 1.270289405479379, + "learning_rate": 6.166561592458307e-06, + "loss": 0.25070682168006897, + "step": 2608 + }, + { + "epoch": 1.289138761892994, + "grad_norm": 1.1954868388063873, + "learning_rate": 6.159016467410397e-06, + "loss": 0.24080060422420502, + "step": 2609 + }, + { + "epoch": 1.2896330161868281, + "grad_norm": 1.3524298235557053, + "learning_rate": 6.151473906548215e-06, + "loss": 0.28041762113571167, + "step": 2610 + }, + { + "epoch": 1.2901272704806623, + "grad_norm": 1.3891353799265191, + "learning_rate": 6.143933914907065e-06, + "loss": 0.2624273896217346, + "step": 2611 + }, + { + "epoch": 1.2906215247744965, + "grad_norm": 1.3838932352032651, + "learning_rate": 6.136396497520536e-06, + "loss": 0.2658112049102783, + "step": 2612 + }, + { + "epoch": 1.2911157790683307, + "grad_norm": 1.3103712430992434, + "learning_rate": 6.1288616594205e-06, + "loss": 0.27714237570762634, + "step": 2613 + }, + { + "epoch": 1.291610033362165, + "grad_norm": 1.2276105048536776, + "learning_rate": 6.121329405637111e-06, + "loss": 0.23253153264522552, + "step": 2614 + }, + { + "epoch": 1.292104287655999, + "grad_norm": 1.2168125400378236, + "learning_rate": 6.1137997411987915e-06, + "loss": 0.2438409924507141, + "step": 2615 + }, + { + "epoch": 1.2925985419498331, + "grad_norm": 1.3814066274151728, + "learning_rate": 6.106272671132236e-06, + "loss": 0.24013856053352356, + "step": 2616 + }, + { + "epoch": 1.2930927962436674, + "grad_norm": 1.4362282063831207, + "learning_rate": 6.098748200462408e-06, + "loss": 0.2850446403026581, + "step": 2617 + }, + { + "epoch": 1.2935870505375016, + "grad_norm": 1.3403873033762816, + "learning_rate": 6.0912263342125445e-06, + "loss": 0.22195187211036682, + "step": 2618 + }, + { + "epoch": 1.2940813048313358, + "grad_norm": 1.3701004376420556, + "learning_rate": 6.083707077404129e-06, + "loss": 0.29266390204429626, + "step": 2619 + }, + { + "epoch": 1.29457555912517, + "grad_norm": 1.2103981171479565, + "learning_rate": 6.076190435056913e-06, + "loss": 0.26741352677345276, + "step": 2620 + }, + { + "epoch": 1.295069813419004, + "grad_norm": 1.259544042020202, + "learning_rate": 6.068676412188892e-06, + "loss": 0.26014602184295654, + "step": 2621 + }, + { + "epoch": 1.2955640677128382, + "grad_norm": 1.2871395012144142, + "learning_rate": 6.061165013816333e-06, + "loss": 0.2561393976211548, + "step": 2622 + }, + { + "epoch": 1.2960583220066724, + "grad_norm": 1.312678751233067, + "learning_rate": 6.053656244953728e-06, + "loss": 0.2952851951122284, + "step": 2623 + }, + { + "epoch": 1.2965525763005066, + "grad_norm": 1.2817239432203538, + "learning_rate": 6.046150110613831e-06, + "loss": 0.2830423414707184, + "step": 2624 + }, + { + "epoch": 1.2970468305943408, + "grad_norm": 1.2514529269380406, + "learning_rate": 6.038646615807622e-06, + "loss": 0.22306497395038605, + "step": 2625 + }, + { + "epoch": 1.297541084888175, + "grad_norm": 1.3018072981213034, + "learning_rate": 6.031145765544333e-06, + "loss": 0.23291784524917603, + "step": 2626 + }, + { + "epoch": 1.2980353391820092, + "grad_norm": 1.3763927806121403, + "learning_rate": 6.023647564831425e-06, + "loss": 0.2376563400030136, + "step": 2627 + }, + { + "epoch": 1.2985295934758434, + "grad_norm": 1.3283544756021872, + "learning_rate": 6.016152018674588e-06, + "loss": 0.2873516380786896, + "step": 2628 + }, + { + "epoch": 1.2990238477696776, + "grad_norm": 1.2475849952661122, + "learning_rate": 6.00865913207774e-06, + "loss": 0.2416999638080597, + "step": 2629 + }, + { + "epoch": 1.2995181020635118, + "grad_norm": 1.2254304075146119, + "learning_rate": 6.001168910043023e-06, + "loss": 0.2627726197242737, + "step": 2630 + }, + { + "epoch": 1.3000123563573458, + "grad_norm": 1.4025542210635493, + "learning_rate": 5.993681357570809e-06, + "loss": 0.25375279784202576, + "step": 2631 + }, + { + "epoch": 1.30050661065118, + "grad_norm": 1.3348797401747288, + "learning_rate": 5.986196479659676e-06, + "loss": 0.2853030562400818, + "step": 2632 + }, + { + "epoch": 1.3010008649450142, + "grad_norm": 1.3089867713489467, + "learning_rate": 5.978714281306425e-06, + "loss": 0.2626519501209259, + "step": 2633 + }, + { + "epoch": 1.3014951192388484, + "grad_norm": 1.4566011034207051, + "learning_rate": 5.971234767506057e-06, + "loss": 0.2895713448524475, + "step": 2634 + }, + { + "epoch": 1.3019893735326826, + "grad_norm": 1.2504104998957544, + "learning_rate": 5.9637579432518e-06, + "loss": 0.24617832899093628, + "step": 2635 + }, + { + "epoch": 1.3024836278265166, + "grad_norm": 1.2199824881911456, + "learning_rate": 5.956283813535066e-06, + "loss": 0.25497785210609436, + "step": 2636 + }, + { + "epoch": 1.3029778821203508, + "grad_norm": 1.3200409304272294, + "learning_rate": 5.948812383345484e-06, + "loss": 0.25832462310791016, + "step": 2637 + }, + { + "epoch": 1.303472136414185, + "grad_norm": 1.214232538768618, + "learning_rate": 5.941343657670866e-06, + "loss": 0.24273909628391266, + "step": 2638 + }, + { + "epoch": 1.3039663907080192, + "grad_norm": 1.2844572342866962, + "learning_rate": 5.933877641497232e-06, + "loss": 0.2668009400367737, + "step": 2639 + }, + { + "epoch": 1.3044606450018534, + "grad_norm": 1.2388896928667246, + "learning_rate": 5.92641433980879e-06, + "loss": 0.2519373595714569, + "step": 2640 + }, + { + "epoch": 1.3049548992956876, + "grad_norm": 1.3760811135868023, + "learning_rate": 5.918953757587928e-06, + "loss": 0.30091768503189087, + "step": 2641 + }, + { + "epoch": 1.3054491535895218, + "grad_norm": 1.207587317973019, + "learning_rate": 5.911495899815225e-06, + "loss": 0.2504241466522217, + "step": 2642 + }, + { + "epoch": 1.305943407883356, + "grad_norm": 1.1902656490822856, + "learning_rate": 5.904040771469444e-06, + "loss": 0.24741190671920776, + "step": 2643 + }, + { + "epoch": 1.3064376621771903, + "grad_norm": 1.2559463008488698, + "learning_rate": 5.896588377527519e-06, + "loss": 0.2636350691318512, + "step": 2644 + }, + { + "epoch": 1.3069319164710245, + "grad_norm": 1.1981836589630794, + "learning_rate": 5.889138722964563e-06, + "loss": 0.22512421011924744, + "step": 2645 + }, + { + "epoch": 1.3074261707648585, + "grad_norm": 1.2451009493990417, + "learning_rate": 5.8816918127538546e-06, + "loss": 0.26447975635528564, + "step": 2646 + }, + { + "epoch": 1.3079204250586927, + "grad_norm": 1.1839899877527418, + "learning_rate": 5.874247651866853e-06, + "loss": 0.22084996104240417, + "step": 2647 + }, + { + "epoch": 1.3084146793525269, + "grad_norm": 1.2774730254159221, + "learning_rate": 5.8668062452731715e-06, + "loss": 0.24033552408218384, + "step": 2648 + }, + { + "epoch": 1.308908933646361, + "grad_norm": 1.305578072115893, + "learning_rate": 5.8593675979405795e-06, + "loss": 0.24829509854316711, + "step": 2649 + }, + { + "epoch": 1.3094031879401953, + "grad_norm": 1.3651670637998603, + "learning_rate": 5.851931714835016e-06, + "loss": 0.29011303186416626, + "step": 2650 + }, + { + "epoch": 1.3098974422340295, + "grad_norm": 1.3474001783390817, + "learning_rate": 5.8444986009205754e-06, + "loss": 0.273196280002594, + "step": 2651 + }, + { + "epoch": 1.3103916965278635, + "grad_norm": 1.4905929158728624, + "learning_rate": 5.837068261159491e-06, + "loss": 0.28843480348587036, + "step": 2652 + }, + { + "epoch": 1.3108859508216977, + "grad_norm": 1.297768951304802, + "learning_rate": 5.829640700512159e-06, + "loss": 0.25919461250305176, + "step": 2653 + }, + { + "epoch": 1.3113802051155319, + "grad_norm": 1.4662917967499176, + "learning_rate": 5.822215923937105e-06, + "loss": 0.24588480591773987, + "step": 2654 + }, + { + "epoch": 1.311874459409366, + "grad_norm": 1.4533199098003418, + "learning_rate": 5.814793936391001e-06, + "loss": 0.26138943433761597, + "step": 2655 + }, + { + "epoch": 1.3123687137032003, + "grad_norm": 1.3892499103405112, + "learning_rate": 5.807374742828675e-06, + "loss": 0.2740943729877472, + "step": 2656 + }, + { + "epoch": 1.3128629679970345, + "grad_norm": 1.2689667055719156, + "learning_rate": 5.7999583482030605e-06, + "loss": 0.2307349294424057, + "step": 2657 + }, + { + "epoch": 1.3133572222908687, + "grad_norm": 1.4143938245126972, + "learning_rate": 5.792544757465242e-06, + "loss": 0.28424161672592163, + "step": 2658 + }, + { + "epoch": 1.313851476584703, + "grad_norm": 1.1640800807114133, + "learning_rate": 5.785133975564426e-06, + "loss": 0.2586106061935425, + "step": 2659 + }, + { + "epoch": 1.3143457308785371, + "grad_norm": 1.3677717868907802, + "learning_rate": 5.7777260074479455e-06, + "loss": 0.23268333077430725, + "step": 2660 + }, + { + "epoch": 1.314839985172371, + "grad_norm": 1.3455357811423037, + "learning_rate": 5.770320858061254e-06, + "loss": 0.22144779562950134, + "step": 2661 + }, + { + "epoch": 1.3153342394662053, + "grad_norm": 1.1539678539958322, + "learning_rate": 5.762918532347925e-06, + "loss": 0.2450334131717682, + "step": 2662 + }, + { + "epoch": 1.3158284937600395, + "grad_norm": 1.3018328480210146, + "learning_rate": 5.7555190352496375e-06, + "loss": 0.24483400583267212, + "step": 2663 + }, + { + "epoch": 1.3163227480538737, + "grad_norm": 1.484825672376601, + "learning_rate": 5.748122371706198e-06, + "loss": 0.2590720057487488, + "step": 2664 + }, + { + "epoch": 1.316817002347708, + "grad_norm": 1.5800261617865896, + "learning_rate": 5.740728546655515e-06, + "loss": 0.27116847038269043, + "step": 2665 + }, + { + "epoch": 1.3173112566415421, + "grad_norm": 1.3133697295364004, + "learning_rate": 5.733337565033595e-06, + "loss": 0.2720273435115814, + "step": 2666 + }, + { + "epoch": 1.3178055109353761, + "grad_norm": 1.223977156924706, + "learning_rate": 5.7259494317745514e-06, + "loss": 0.22150173783302307, + "step": 2667 + }, + { + "epoch": 1.3182997652292103, + "grad_norm": 1.2771960781536442, + "learning_rate": 5.718564151810597e-06, + "loss": 0.27474984526634216, + "step": 2668 + }, + { + "epoch": 1.3187940195230445, + "grad_norm": 1.3886425317966573, + "learning_rate": 5.711181730072044e-06, + "loss": 0.2547265291213989, + "step": 2669 + }, + { + "epoch": 1.3192882738168787, + "grad_norm": 1.3822455459704068, + "learning_rate": 5.703802171487286e-06, + "loss": 0.2686036229133606, + "step": 2670 + }, + { + "epoch": 1.319782528110713, + "grad_norm": 1.2588694556349689, + "learning_rate": 5.696425480982814e-06, + "loss": 0.2276458591222763, + "step": 2671 + }, + { + "epoch": 1.3202767824045472, + "grad_norm": 1.3366284026803796, + "learning_rate": 5.6890516634832e-06, + "loss": 0.25005075335502625, + "step": 2672 + }, + { + "epoch": 1.3207710366983814, + "grad_norm": 1.4092724528348008, + "learning_rate": 5.681680723911104e-06, + "loss": 0.25919869542121887, + "step": 2673 + }, + { + "epoch": 1.3212652909922156, + "grad_norm": 1.3254224112633677, + "learning_rate": 5.6743126671872505e-06, + "loss": 0.2684757709503174, + "step": 2674 + }, + { + "epoch": 1.3217595452860498, + "grad_norm": 1.2529305606098464, + "learning_rate": 5.666947498230451e-06, + "loss": 0.2554991543292999, + "step": 2675 + }, + { + "epoch": 1.322253799579884, + "grad_norm": 1.3734571061597927, + "learning_rate": 5.6595852219575975e-06, + "loss": 0.27026665210723877, + "step": 2676 + }, + { + "epoch": 1.322748053873718, + "grad_norm": 1.5029882994051502, + "learning_rate": 5.652225843283629e-06, + "loss": 0.3248092234134674, + "step": 2677 + }, + { + "epoch": 1.3232423081675522, + "grad_norm": 1.3299866785479277, + "learning_rate": 5.644869367121564e-06, + "loss": 0.2554503083229065, + "step": 2678 + }, + { + "epoch": 1.3237365624613864, + "grad_norm": 1.4099986774485116, + "learning_rate": 5.637515798382488e-06, + "loss": 0.25482693314552307, + "step": 2679 + }, + { + "epoch": 1.3242308167552206, + "grad_norm": 1.268292092612611, + "learning_rate": 5.630165141975523e-06, + "loss": 0.24664446711540222, + "step": 2680 + }, + { + "epoch": 1.3247250710490548, + "grad_norm": 1.3674712589344702, + "learning_rate": 5.622817402807879e-06, + "loss": 0.23855865001678467, + "step": 2681 + }, + { + "epoch": 1.325219325342889, + "grad_norm": 1.28659959156705, + "learning_rate": 5.615472585784796e-06, + "loss": 0.2847699820995331, + "step": 2682 + }, + { + "epoch": 1.325713579636723, + "grad_norm": 1.3902791844570088, + "learning_rate": 5.608130695809564e-06, + "loss": 0.2705647051334381, + "step": 2683 + }, + { + "epoch": 1.3262078339305572, + "grad_norm": 1.3726972299660716, + "learning_rate": 5.600791737783523e-06, + "loss": 0.30135318636894226, + "step": 2684 + }, + { + "epoch": 1.3267020882243914, + "grad_norm": 1.3006770767718296, + "learning_rate": 5.593455716606069e-06, + "loss": 0.261536180973053, + "step": 2685 + }, + { + "epoch": 1.3271963425182256, + "grad_norm": 1.2249107195075626, + "learning_rate": 5.586122637174614e-06, + "loss": 0.24006187915802002, + "step": 2686 + }, + { + "epoch": 1.3276905968120598, + "grad_norm": 1.2887498899635654, + "learning_rate": 5.578792504384618e-06, + "loss": 0.27928346395492554, + "step": 2687 + }, + { + "epoch": 1.328184851105894, + "grad_norm": 1.1715759673643904, + "learning_rate": 5.5714653231295745e-06, + "loss": 0.24134980142116547, + "step": 2688 + }, + { + "epoch": 1.3286791053997282, + "grad_norm": 1.2633540397916776, + "learning_rate": 5.5641410983010055e-06, + "loss": 0.27914801239967346, + "step": 2689 + }, + { + "epoch": 1.3291733596935624, + "grad_norm": 1.6348254119913803, + "learning_rate": 5.55681983478846e-06, + "loss": 0.2735476493835449, + "step": 2690 + }, + { + "epoch": 1.3296676139873966, + "grad_norm": 1.3384777828423575, + "learning_rate": 5.549501537479511e-06, + "loss": 0.24919739365577698, + "step": 2691 + }, + { + "epoch": 1.3301618682812306, + "grad_norm": 1.430948519009228, + "learning_rate": 5.542186211259737e-06, + "loss": 0.25435787439346313, + "step": 2692 + }, + { + "epoch": 1.3306561225750648, + "grad_norm": 1.2533415908145504, + "learning_rate": 5.534873861012763e-06, + "loss": 0.2502862811088562, + "step": 2693 + }, + { + "epoch": 1.331150376868899, + "grad_norm": 1.5771700033159861, + "learning_rate": 5.527564491620195e-06, + "loss": 0.25752580165863037, + "step": 2694 + }, + { + "epoch": 1.3316446311627332, + "grad_norm": 1.332305251527839, + "learning_rate": 5.520258107961671e-06, + "loss": 0.22301846742630005, + "step": 2695 + }, + { + "epoch": 1.3321388854565674, + "grad_norm": 1.4890781870784164, + "learning_rate": 5.512954714914825e-06, + "loss": 0.24581964313983917, + "step": 2696 + }, + { + "epoch": 1.3326331397504017, + "grad_norm": 1.3113609641171107, + "learning_rate": 5.5056543173553e-06, + "loss": 0.271970272064209, + "step": 2697 + }, + { + "epoch": 1.3331273940442356, + "grad_norm": 1.2432947451070444, + "learning_rate": 5.498356920156735e-06, + "loss": 0.23041053116321564, + "step": 2698 + }, + { + "epoch": 1.3336216483380698, + "grad_norm": 1.3239879393507852, + "learning_rate": 5.491062528190775e-06, + "loss": 0.2338491678237915, + "step": 2699 + }, + { + "epoch": 1.334115902631904, + "grad_norm": 1.3971989589857847, + "learning_rate": 5.483771146327037e-06, + "loss": 0.2667239010334015, + "step": 2700 + }, + { + "epoch": 1.3346101569257383, + "grad_norm": 1.1737606299055239, + "learning_rate": 5.4764827794331586e-06, + "loss": 0.24761441349983215, + "step": 2701 + }, + { + "epoch": 1.3351044112195725, + "grad_norm": 1.2384835240862428, + "learning_rate": 5.469197432374747e-06, + "loss": 0.24087639153003693, + "step": 2702 + }, + { + "epoch": 1.3355986655134067, + "grad_norm": 1.3287799587341789, + "learning_rate": 5.461915110015386e-06, + "loss": 0.26774898171424866, + "step": 2703 + }, + { + "epoch": 1.3360929198072409, + "grad_norm": 1.432719946516567, + "learning_rate": 5.454635817216658e-06, + "loss": 0.2820417284965515, + "step": 2704 + }, + { + "epoch": 1.336587174101075, + "grad_norm": 1.408646831955897, + "learning_rate": 5.447359558838113e-06, + "loss": 0.2891086935997009, + "step": 2705 + }, + { + "epoch": 1.3370814283949093, + "grad_norm": 1.370327694474157, + "learning_rate": 5.440086339737277e-06, + "loss": 0.24551361799240112, + "step": 2706 + }, + { + "epoch": 1.3375756826887435, + "grad_norm": 1.3889596017030068, + "learning_rate": 5.432816164769648e-06, + "loss": 0.2293522208929062, + "step": 2707 + }, + { + "epoch": 1.3380699369825775, + "grad_norm": 1.255610549812546, + "learning_rate": 5.425549038788693e-06, + "loss": 0.22325105965137482, + "step": 2708 + }, + { + "epoch": 1.3385641912764117, + "grad_norm": 1.3152207031427636, + "learning_rate": 5.4182849666458315e-06, + "loss": 0.2263861447572708, + "step": 2709 + }, + { + "epoch": 1.339058445570246, + "grad_norm": 1.2663328789435477, + "learning_rate": 5.411023953190466e-06, + "loss": 0.26902303099632263, + "step": 2710 + }, + { + "epoch": 1.33955269986408, + "grad_norm": 1.4136099878472004, + "learning_rate": 5.403766003269944e-06, + "loss": 0.26154825091362, + "step": 2711 + }, + { + "epoch": 1.3400469541579143, + "grad_norm": 1.32960722740892, + "learning_rate": 5.396511121729562e-06, + "loss": 0.2878270745277405, + "step": 2712 + }, + { + "epoch": 1.3405412084517483, + "grad_norm": 1.3136699200223048, + "learning_rate": 5.389259313412581e-06, + "loss": 0.26206687092781067, + "step": 2713 + }, + { + "epoch": 1.3410354627455825, + "grad_norm": 1.4998302342686003, + "learning_rate": 5.382010583160201e-06, + "loss": 0.25612518191337585, + "step": 2714 + }, + { + "epoch": 1.3415297170394167, + "grad_norm": 1.2688327982594605, + "learning_rate": 5.374764935811574e-06, + "loss": 0.25600868463516235, + "step": 2715 + }, + { + "epoch": 1.342023971333251, + "grad_norm": 1.274882827976935, + "learning_rate": 5.367522376203787e-06, + "loss": 0.24837616086006165, + "step": 2716 + }, + { + "epoch": 1.3425182256270851, + "grad_norm": 1.2814047275641038, + "learning_rate": 5.360282909171875e-06, + "loss": 0.23487885296344757, + "step": 2717 + }, + { + "epoch": 1.3430124799209193, + "grad_norm": 1.2024219184737237, + "learning_rate": 5.353046539548797e-06, + "loss": 0.22786842286586761, + "step": 2718 + }, + { + "epoch": 1.3435067342147535, + "grad_norm": 1.288373437821988, + "learning_rate": 5.3458132721654564e-06, + "loss": 0.2198137640953064, + "step": 2719 + }, + { + "epoch": 1.3440009885085877, + "grad_norm": 1.157338464361865, + "learning_rate": 5.338583111850671e-06, + "loss": 0.20056495070457458, + "step": 2720 + }, + { + "epoch": 1.344495242802422, + "grad_norm": 1.2341328448147324, + "learning_rate": 5.331356063431195e-06, + "loss": 0.21636295318603516, + "step": 2721 + }, + { + "epoch": 1.3449894970962561, + "grad_norm": 1.2390666617057948, + "learning_rate": 5.32413213173171e-06, + "loss": 0.23933230340480804, + "step": 2722 + }, + { + "epoch": 1.3454837513900901, + "grad_norm": 1.3024836233276083, + "learning_rate": 5.316911321574799e-06, + "loss": 0.2402106523513794, + "step": 2723 + }, + { + "epoch": 1.3459780056839243, + "grad_norm": 1.252933113923405, + "learning_rate": 5.309693637780979e-06, + "loss": 0.22524669766426086, + "step": 2724 + }, + { + "epoch": 1.3464722599777585, + "grad_norm": 1.3140972939485838, + "learning_rate": 5.302479085168668e-06, + "loss": 0.25381600856781006, + "step": 2725 + }, + { + "epoch": 1.3469665142715928, + "grad_norm": 1.2857997911307526, + "learning_rate": 5.295267668554202e-06, + "loss": 0.2614738643169403, + "step": 2726 + }, + { + "epoch": 1.347460768565427, + "grad_norm": 8.575818718402259, + "learning_rate": 5.288059392751817e-06, + "loss": 0.2701472043991089, + "step": 2727 + }, + { + "epoch": 1.3479550228592612, + "grad_norm": 1.378318405059408, + "learning_rate": 5.280854262573661e-06, + "loss": 0.2788996696472168, + "step": 2728 + }, + { + "epoch": 1.3484492771530951, + "grad_norm": 1.2759693341337726, + "learning_rate": 5.273652282829764e-06, + "loss": 0.2419927418231964, + "step": 2729 + }, + { + "epoch": 1.3489435314469294, + "grad_norm": 1.4943656047554885, + "learning_rate": 5.266453458328071e-06, + "loss": 0.26454097032546997, + "step": 2730 + }, + { + "epoch": 1.3494377857407636, + "grad_norm": 1.3109211241308218, + "learning_rate": 5.259257793874421e-06, + "loss": 0.24090510606765747, + "step": 2731 + }, + { + "epoch": 1.3499320400345978, + "grad_norm": 1.3390086912520884, + "learning_rate": 5.252065294272528e-06, + "loss": 0.27343428134918213, + "step": 2732 + }, + { + "epoch": 1.350426294328432, + "grad_norm": 1.3272957509132868, + "learning_rate": 5.244875964324005e-06, + "loss": 0.2623448967933655, + "step": 2733 + }, + { + "epoch": 1.3509205486222662, + "grad_norm": 1.2273005978142049, + "learning_rate": 5.237689808828346e-06, + "loss": 0.22721052169799805, + "step": 2734 + }, + { + "epoch": 1.3514148029161004, + "grad_norm": 1.4111267721919942, + "learning_rate": 5.230506832582924e-06, + "loss": 0.26385387778282166, + "step": 2735 + }, + { + "epoch": 1.3519090572099346, + "grad_norm": 1.4309565613654673, + "learning_rate": 5.223327040382995e-06, + "loss": 0.2679533064365387, + "step": 2736 + }, + { + "epoch": 1.3524033115037688, + "grad_norm": 1.285385576934023, + "learning_rate": 5.2161504370216855e-06, + "loss": 0.25042447447776794, + "step": 2737 + }, + { + "epoch": 1.3528975657976028, + "grad_norm": 1.3420398780717075, + "learning_rate": 5.2089770272899845e-06, + "loss": 0.22735297679901123, + "step": 2738 + }, + { + "epoch": 1.353391820091437, + "grad_norm": 1.2715261749804811, + "learning_rate": 5.201806815976772e-06, + "loss": 0.25517284870147705, + "step": 2739 + }, + { + "epoch": 1.3538860743852712, + "grad_norm": 1.4834789867138143, + "learning_rate": 5.194639807868767e-06, + "loss": 0.2942652702331543, + "step": 2740 + }, + { + "epoch": 1.3543803286791054, + "grad_norm": 1.2535180106339032, + "learning_rate": 5.187476007750567e-06, + "loss": 0.2605661153793335, + "step": 2741 + }, + { + "epoch": 1.3548745829729396, + "grad_norm": 1.34702814682356, + "learning_rate": 5.1803154204046215e-06, + "loss": 0.22976648807525635, + "step": 2742 + }, + { + "epoch": 1.3553688372667738, + "grad_norm": 1.2786328684416228, + "learning_rate": 5.173158050611236e-06, + "loss": 0.24301470816135406, + "step": 2743 + }, + { + "epoch": 1.3558630915606078, + "grad_norm": 1.3509518199555386, + "learning_rate": 5.166003903148568e-06, + "loss": 0.2714199125766754, + "step": 2744 + }, + { + "epoch": 1.356357345854442, + "grad_norm": 1.4130809131188478, + "learning_rate": 5.15885298279263e-06, + "loss": 0.27004045248031616, + "step": 2745 + }, + { + "epoch": 1.3568516001482762, + "grad_norm": 1.1866112739948385, + "learning_rate": 5.151705294317262e-06, + "loss": 0.2062053680419922, + "step": 2746 + }, + { + "epoch": 1.3573458544421104, + "grad_norm": 1.3476275860643891, + "learning_rate": 5.144560842494168e-06, + "loss": 0.2589803636074066, + "step": 2747 + }, + { + "epoch": 1.3578401087359446, + "grad_norm": 1.4207662826517113, + "learning_rate": 5.137419632092886e-06, + "loss": 0.26469242572784424, + "step": 2748 + }, + { + "epoch": 1.3583343630297788, + "grad_norm": 1.217607994018294, + "learning_rate": 5.130281667880774e-06, + "loss": 0.26241326332092285, + "step": 2749 + }, + { + "epoch": 1.358828617323613, + "grad_norm": 1.375829317891462, + "learning_rate": 5.123146954623038e-06, + "loss": 0.2674810290336609, + "step": 2750 + }, + { + "epoch": 1.3593228716174472, + "grad_norm": 1.3872924823998294, + "learning_rate": 5.116015497082719e-06, + "loss": 0.23186063766479492, + "step": 2751 + }, + { + "epoch": 1.3598171259112815, + "grad_norm": 1.3207469475464653, + "learning_rate": 5.108887300020669e-06, + "loss": 0.2794165313243866, + "step": 2752 + }, + { + "epoch": 1.3603113802051157, + "grad_norm": 1.2682065300683938, + "learning_rate": 5.1017623681955705e-06, + "loss": 0.25263023376464844, + "step": 2753 + }, + { + "epoch": 1.3608056344989496, + "grad_norm": 1.385223404499901, + "learning_rate": 5.0946407063639315e-06, + "loss": 0.2503500282764435, + "step": 2754 + }, + { + "epoch": 1.3612998887927839, + "grad_norm": 1.1490078969357793, + "learning_rate": 5.087522319280061e-06, + "loss": 0.21871569752693176, + "step": 2755 + }, + { + "epoch": 1.361794143086618, + "grad_norm": 1.3919853358310244, + "learning_rate": 5.080407211696103e-06, + "loss": 0.2790142893791199, + "step": 2756 + }, + { + "epoch": 1.3622883973804523, + "grad_norm": 1.3837841689522787, + "learning_rate": 5.073295388362003e-06, + "loss": 0.27197304368019104, + "step": 2757 + }, + { + "epoch": 1.3627826516742865, + "grad_norm": 1.3248855835987599, + "learning_rate": 5.066186854025502e-06, + "loss": 0.2402152568101883, + "step": 2758 + }, + { + "epoch": 1.3632769059681207, + "grad_norm": 1.3193984824612894, + "learning_rate": 5.059081613432162e-06, + "loss": 0.24418887495994568, + "step": 2759 + }, + { + "epoch": 1.3637711602619547, + "grad_norm": 1.1840901033348532, + "learning_rate": 5.05197967132534e-06, + "loss": 0.2239491045475006, + "step": 2760 + }, + { + "epoch": 1.3642654145557889, + "grad_norm": 1.3401183348354848, + "learning_rate": 5.044881032446192e-06, + "loss": 0.25177091360092163, + "step": 2761 + }, + { + "epoch": 1.364759668849623, + "grad_norm": 1.2524679914953787, + "learning_rate": 5.0377857015336655e-06, + "loss": 0.25462138652801514, + "step": 2762 + }, + { + "epoch": 1.3652539231434573, + "grad_norm": 1.154660335850044, + "learning_rate": 5.0306936833245034e-06, + "loss": 0.21030092239379883, + "step": 2763 + }, + { + "epoch": 1.3657481774372915, + "grad_norm": 1.2778480955324765, + "learning_rate": 5.0236049825532355e-06, + "loss": 0.24033348262310028, + "step": 2764 + }, + { + "epoch": 1.3662424317311257, + "grad_norm": 1.2874693424331807, + "learning_rate": 5.016519603952177e-06, + "loss": 0.20803815126419067, + "step": 2765 + }, + { + "epoch": 1.36673668602496, + "grad_norm": 1.3360777408248645, + "learning_rate": 5.00943755225143e-06, + "loss": 0.21589599549770355, + "step": 2766 + }, + { + "epoch": 1.367230940318794, + "grad_norm": 1.3112690340132882, + "learning_rate": 5.00235883217886e-06, + "loss": 0.2690975069999695, + "step": 2767 + }, + { + "epoch": 1.3677251946126283, + "grad_norm": 1.395793399890879, + "learning_rate": 4.995283448460131e-06, + "loss": 0.2368423044681549, + "step": 2768 + }, + { + "epoch": 1.3682194489064623, + "grad_norm": 1.428306560095472, + "learning_rate": 4.988211405818661e-06, + "loss": 0.2801262140274048, + "step": 2769 + }, + { + "epoch": 1.3687137032002965, + "grad_norm": 1.4209027545437471, + "learning_rate": 4.981142708975647e-06, + "loss": 0.2777586877346039, + "step": 2770 + }, + { + "epoch": 1.3692079574941307, + "grad_norm": 1.1921679323806382, + "learning_rate": 4.97407736265005e-06, + "loss": 0.2400980144739151, + "step": 2771 + }, + { + "epoch": 1.369702211787965, + "grad_norm": 1.233538906022963, + "learning_rate": 4.967015371558592e-06, + "loss": 0.2513861358165741, + "step": 2772 + }, + { + "epoch": 1.3701964660817991, + "grad_norm": 1.2944813845771217, + "learning_rate": 4.959956740415761e-06, + "loss": 0.2785816490650177, + "step": 2773 + }, + { + "epoch": 1.3706907203756333, + "grad_norm": 1.456856079389265, + "learning_rate": 4.9529014739338e-06, + "loss": 0.29092347621917725, + "step": 2774 + }, + { + "epoch": 1.3711849746694673, + "grad_norm": 1.3133832748237033, + "learning_rate": 4.945849576822693e-06, + "loss": 0.27067384123802185, + "step": 2775 + }, + { + "epoch": 1.3716792289633015, + "grad_norm": 1.3000530351478699, + "learning_rate": 4.938801053790199e-06, + "loss": 0.21500205993652344, + "step": 2776 + }, + { + "epoch": 1.3721734832571357, + "grad_norm": 1.2838621226635265, + "learning_rate": 4.931755909541808e-06, + "loss": 0.2422936111688614, + "step": 2777 + }, + { + "epoch": 1.37266773755097, + "grad_norm": 1.3694112071584477, + "learning_rate": 4.9247141487807515e-06, + "loss": 0.2760060727596283, + "step": 2778 + }, + { + "epoch": 1.3731619918448041, + "grad_norm": 1.39746625445185, + "learning_rate": 4.917675776208013e-06, + "loss": 0.22626326978206635, + "step": 2779 + }, + { + "epoch": 1.3736562461386383, + "grad_norm": 1.34096746485375, + "learning_rate": 4.910640796522308e-06, + "loss": 0.23023411631584167, + "step": 2780 + }, + { + "epoch": 1.3741505004324726, + "grad_norm": 1.29137003736815, + "learning_rate": 4.903609214420088e-06, + "loss": 0.22157053649425507, + "step": 2781 + }, + { + "epoch": 1.3746447547263068, + "grad_norm": 1.1801851543310786, + "learning_rate": 4.89658103459554e-06, + "loss": 0.24125584959983826, + "step": 2782 + }, + { + "epoch": 1.375139009020141, + "grad_norm": 1.3517508821088553, + "learning_rate": 4.889556261740578e-06, + "loss": 0.26294079422950745, + "step": 2783 + }, + { + "epoch": 1.3756332633139752, + "grad_norm": 1.2726719724151299, + "learning_rate": 4.882534900544829e-06, + "loss": 0.25327497720718384, + "step": 2784 + }, + { + "epoch": 1.3761275176078092, + "grad_norm": 1.2868199846308948, + "learning_rate": 4.875516955695663e-06, + "loss": 0.2716723084449768, + "step": 2785 + }, + { + "epoch": 1.3766217719016434, + "grad_norm": 1.4619117882899046, + "learning_rate": 4.8685024318781615e-06, + "loss": 0.2889532446861267, + "step": 2786 + }, + { + "epoch": 1.3771160261954776, + "grad_norm": 1.2622088454697893, + "learning_rate": 4.861491333775114e-06, + "loss": 0.23743030428886414, + "step": 2787 + }, + { + "epoch": 1.3776102804893118, + "grad_norm": 1.2912517641324606, + "learning_rate": 4.8544836660670305e-06, + "loss": 0.27180567383766174, + "step": 2788 + }, + { + "epoch": 1.378104534783146, + "grad_norm": 1.3376004646586275, + "learning_rate": 4.847479433432131e-06, + "loss": 0.2549944221973419, + "step": 2789 + }, + { + "epoch": 1.37859878907698, + "grad_norm": 1.189305404121555, + "learning_rate": 4.8404786405463414e-06, + "loss": 0.24112319946289062, + "step": 2790 + }, + { + "epoch": 1.3790930433708142, + "grad_norm": 1.1833978049698726, + "learning_rate": 4.833481292083291e-06, + "loss": 0.22865869104862213, + "step": 2791 + }, + { + "epoch": 1.3795872976646484, + "grad_norm": 1.268697923498799, + "learning_rate": 4.82648739271431e-06, + "loss": 0.24851003289222717, + "step": 2792 + }, + { + "epoch": 1.3800815519584826, + "grad_norm": 1.2931223721765053, + "learning_rate": 4.819496947108424e-06, + "loss": 0.251456081867218, + "step": 2793 + }, + { + "epoch": 1.3805758062523168, + "grad_norm": 1.4758961733623657, + "learning_rate": 4.81250995993236e-06, + "loss": 0.31711041927337646, + "step": 2794 + }, + { + "epoch": 1.381070060546151, + "grad_norm": 1.3291779254725478, + "learning_rate": 4.805526435850523e-06, + "loss": 0.2204340100288391, + "step": 2795 + }, + { + "epoch": 1.3815643148399852, + "grad_norm": 1.2784619373678463, + "learning_rate": 4.798546379525013e-06, + "loss": 0.26289406418800354, + "step": 2796 + }, + { + "epoch": 1.3820585691338194, + "grad_norm": 1.28320111492484, + "learning_rate": 4.7915697956156284e-06, + "loss": 0.24830611050128937, + "step": 2797 + }, + { + "epoch": 1.3825528234276536, + "grad_norm": 1.2879657785107324, + "learning_rate": 4.784596688779825e-06, + "loss": 0.24792183935642242, + "step": 2798 + }, + { + "epoch": 1.3830470777214878, + "grad_norm": 1.2696074389245717, + "learning_rate": 4.777627063672753e-06, + "loss": 0.2689560651779175, + "step": 2799 + }, + { + "epoch": 1.3835413320153218, + "grad_norm": 1.3225545388421776, + "learning_rate": 4.770660924947238e-06, + "loss": 0.24323254823684692, + "step": 2800 + }, + { + "epoch": 1.384035586309156, + "grad_norm": 1.4076671335254063, + "learning_rate": 4.7636982772537645e-06, + "loss": 0.24404528737068176, + "step": 2801 + }, + { + "epoch": 1.3845298406029902, + "grad_norm": 1.203765816908177, + "learning_rate": 4.7567391252405075e-06, + "loss": 0.23512448370456696, + "step": 2802 + }, + { + "epoch": 1.3850240948968244, + "grad_norm": 1.5018331188451308, + "learning_rate": 4.749783473553297e-06, + "loss": 0.26446110010147095, + "step": 2803 + }, + { + "epoch": 1.3855183491906586, + "grad_norm": 1.408580468005289, + "learning_rate": 4.742831326835618e-06, + "loss": 0.24630968272686005, + "step": 2804 + }, + { + "epoch": 1.3860126034844928, + "grad_norm": 1.3358261514200123, + "learning_rate": 4.735882689728628e-06, + "loss": 0.253492146730423, + "step": 2805 + }, + { + "epoch": 1.3865068577783268, + "grad_norm": 1.3501776737603972, + "learning_rate": 4.7289375668711444e-06, + "loss": 0.271090567111969, + "step": 2806 + }, + { + "epoch": 1.387001112072161, + "grad_norm": 1.278147407656648, + "learning_rate": 4.721995962899625e-06, + "loss": 0.24045832455158234, + "step": 2807 + }, + { + "epoch": 1.3874953663659952, + "grad_norm": 1.3482420589650876, + "learning_rate": 4.715057882448187e-06, + "loss": 0.2525935471057892, + "step": 2808 + }, + { + "epoch": 1.3879896206598294, + "grad_norm": 1.6416013674407632, + "learning_rate": 4.708123330148593e-06, + "loss": 0.30852392315864563, + "step": 2809 + }, + { + "epoch": 1.3884838749536637, + "grad_norm": 1.4379358472073636, + "learning_rate": 4.701192310630253e-06, + "loss": 0.2770250737667084, + "step": 2810 + }, + { + "epoch": 1.3889781292474979, + "grad_norm": 1.3872314722590495, + "learning_rate": 4.6942648285202154e-06, + "loss": 0.29135680198669434, + "step": 2811 + }, + { + "epoch": 1.389472383541332, + "grad_norm": 1.3561535153102244, + "learning_rate": 4.687340888443171e-06, + "loss": 0.26933860778808594, + "step": 2812 + }, + { + "epoch": 1.3899666378351663, + "grad_norm": 1.3589820356083573, + "learning_rate": 4.680420495021436e-06, + "loss": 0.26089105010032654, + "step": 2813 + }, + { + "epoch": 1.3904608921290005, + "grad_norm": 1.446680212777315, + "learning_rate": 4.673503652874977e-06, + "loss": 0.26031410694122314, + "step": 2814 + }, + { + "epoch": 1.3909551464228345, + "grad_norm": 1.4223445911905375, + "learning_rate": 4.6665903666213685e-06, + "loss": 0.2887076139450073, + "step": 2815 + }, + { + "epoch": 1.3914494007166687, + "grad_norm": 1.4125652827001185, + "learning_rate": 4.6596806408758275e-06, + "loss": 0.2360706925392151, + "step": 2816 + }, + { + "epoch": 1.3919436550105029, + "grad_norm": 1.2857689419175287, + "learning_rate": 4.652774480251186e-06, + "loss": 0.22275522351264954, + "step": 2817 + }, + { + "epoch": 1.392437909304337, + "grad_norm": 1.4433288432295395, + "learning_rate": 4.645871889357899e-06, + "loss": 0.2425977736711502, + "step": 2818 + }, + { + "epoch": 1.3929321635981713, + "grad_norm": 1.3257241152583827, + "learning_rate": 4.638972872804038e-06, + "loss": 0.25219830870628357, + "step": 2819 + }, + { + "epoch": 1.3934264178920055, + "grad_norm": 1.3749035761313395, + "learning_rate": 4.6320774351952916e-06, + "loss": 0.28060346841812134, + "step": 2820 + }, + { + "epoch": 1.3939206721858395, + "grad_norm": 1.2003147708990263, + "learning_rate": 4.625185581134942e-06, + "loss": 0.2395240217447281, + "step": 2821 + }, + { + "epoch": 1.3944149264796737, + "grad_norm": 1.1704641579429333, + "learning_rate": 4.618297315223906e-06, + "loss": 0.23622646927833557, + "step": 2822 + }, + { + "epoch": 1.394909180773508, + "grad_norm": 1.2829625624138312, + "learning_rate": 4.611412642060692e-06, + "loss": 0.2189474105834961, + "step": 2823 + }, + { + "epoch": 1.395403435067342, + "grad_norm": 1.433264639271618, + "learning_rate": 4.6045315662414e-06, + "loss": 0.266002357006073, + "step": 2824 + }, + { + "epoch": 1.3958976893611763, + "grad_norm": 1.3252437693414834, + "learning_rate": 4.5976540923597425e-06, + "loss": 0.2402176856994629, + "step": 2825 + }, + { + "epoch": 1.3963919436550105, + "grad_norm": 1.359969321526994, + "learning_rate": 4.5907802250070235e-06, + "loss": 0.2493474781513214, + "step": 2826 + }, + { + "epoch": 1.3968861979488447, + "grad_norm": 1.41117190363675, + "learning_rate": 4.583909968772137e-06, + "loss": 0.25716543197631836, + "step": 2827 + }, + { + "epoch": 1.397380452242679, + "grad_norm": 1.2726969842984424, + "learning_rate": 4.57704332824157e-06, + "loss": 0.29470473527908325, + "step": 2828 + }, + { + "epoch": 1.3978747065365131, + "grad_norm": 1.3349562969336177, + "learning_rate": 4.570180307999394e-06, + "loss": 0.28095656633377075, + "step": 2829 + }, + { + "epoch": 1.3983689608303473, + "grad_norm": 1.3296802970374444, + "learning_rate": 4.563320912627256e-06, + "loss": 0.2351825088262558, + "step": 2830 + }, + { + "epoch": 1.3988632151241813, + "grad_norm": 1.378245480597285, + "learning_rate": 4.556465146704399e-06, + "loss": 0.25859856605529785, + "step": 2831 + }, + { + "epoch": 1.3993574694180155, + "grad_norm": 1.3122509634402246, + "learning_rate": 4.549613014807637e-06, + "loss": 0.2503181993961334, + "step": 2832 + }, + { + "epoch": 1.3998517237118497, + "grad_norm": 1.4164889794081637, + "learning_rate": 4.542764521511345e-06, + "loss": 0.26368820667266846, + "step": 2833 + }, + { + "epoch": 1.400345978005684, + "grad_norm": 1.2584462742908673, + "learning_rate": 4.535919671387483e-06, + "loss": 0.24077676236629486, + "step": 2834 + }, + { + "epoch": 1.4008402322995182, + "grad_norm": 1.3906309875331755, + "learning_rate": 4.529078469005577e-06, + "loss": 0.27042093873023987, + "step": 2835 + }, + { + "epoch": 1.4013344865933524, + "grad_norm": 1.3047899471845867, + "learning_rate": 4.5222409189327155e-06, + "loss": 0.2731306552886963, + "step": 2836 + }, + { + "epoch": 1.4018287408871863, + "grad_norm": 1.293016022457822, + "learning_rate": 4.515407025733548e-06, + "loss": 0.2925037741661072, + "step": 2837 + }, + { + "epoch": 1.4023229951810205, + "grad_norm": 1.3019226114538747, + "learning_rate": 4.508576793970285e-06, + "loss": 0.2927025556564331, + "step": 2838 + }, + { + "epoch": 1.4028172494748548, + "grad_norm": 1.2637397509173496, + "learning_rate": 4.5017502282026926e-06, + "loss": 0.26285338401794434, + "step": 2839 + }, + { + "epoch": 1.403311503768689, + "grad_norm": 1.3147900807622677, + "learning_rate": 4.49492733298809e-06, + "loss": 0.22698873281478882, + "step": 2840 + }, + { + "epoch": 1.4038057580625232, + "grad_norm": 1.3171706155487821, + "learning_rate": 4.488108112881339e-06, + "loss": 0.24116170406341553, + "step": 2841 + }, + { + "epoch": 1.4043000123563574, + "grad_norm": 1.57472275672956, + "learning_rate": 4.481292572434852e-06, + "loss": 0.3211704194545746, + "step": 2842 + }, + { + "epoch": 1.4047942666501916, + "grad_norm": 1.3631722904804857, + "learning_rate": 4.474480716198598e-06, + "loss": 0.26634523272514343, + "step": 2843 + }, + { + "epoch": 1.4052885209440258, + "grad_norm": 1.2801660794508798, + "learning_rate": 4.467672548720066e-06, + "loss": 0.24751242995262146, + "step": 2844 + }, + { + "epoch": 1.40578277523786, + "grad_norm": 1.2023997182117507, + "learning_rate": 4.4608680745442915e-06, + "loss": 0.22031354904174805, + "step": 2845 + }, + { + "epoch": 1.406277029531694, + "grad_norm": 1.4549549871552898, + "learning_rate": 4.454067298213847e-06, + "loss": 0.2474634051322937, + "step": 2846 + }, + { + "epoch": 1.4067712838255282, + "grad_norm": 1.2925543429398942, + "learning_rate": 4.4472702242688315e-06, + "loss": 0.2494845986366272, + "step": 2847 + }, + { + "epoch": 1.4072655381193624, + "grad_norm": 1.246615378915442, + "learning_rate": 4.440476857246876e-06, + "loss": 0.23150494694709778, + "step": 2848 + }, + { + "epoch": 1.4077597924131966, + "grad_norm": 1.3473585855048795, + "learning_rate": 4.433687201683138e-06, + "loss": 0.2093413770198822, + "step": 2849 + }, + { + "epoch": 1.4082540467070308, + "grad_norm": 1.4247715723132508, + "learning_rate": 4.426901262110287e-06, + "loss": 0.26741865277290344, + "step": 2850 + }, + { + "epoch": 1.408748301000865, + "grad_norm": 1.3965732526570211, + "learning_rate": 4.420119043058521e-06, + "loss": 0.2599044740200043, + "step": 2851 + }, + { + "epoch": 1.409242555294699, + "grad_norm": 1.37695062225065, + "learning_rate": 4.413340549055562e-06, + "loss": 0.26934683322906494, + "step": 2852 + }, + { + "epoch": 1.4097368095885332, + "grad_norm": 1.247550824996485, + "learning_rate": 4.4065657846266255e-06, + "loss": 0.2609720528125763, + "step": 2853 + }, + { + "epoch": 1.4102310638823674, + "grad_norm": 1.3034094501092508, + "learning_rate": 4.39979475429445e-06, + "loss": 0.23431813716888428, + "step": 2854 + }, + { + "epoch": 1.4107253181762016, + "grad_norm": 1.5127417165274348, + "learning_rate": 4.39302746257928e-06, + "loss": 0.2791878581047058, + "step": 2855 + }, + { + "epoch": 1.4112195724700358, + "grad_norm": 1.445393105302077, + "learning_rate": 4.386263913998862e-06, + "loss": 0.30482247471809387, + "step": 2856 + }, + { + "epoch": 1.41171382676387, + "grad_norm": 1.517774336378155, + "learning_rate": 4.379504113068445e-06, + "loss": 0.24561305344104767, + "step": 2857 + }, + { + "epoch": 1.4122080810577042, + "grad_norm": 1.2686201180133903, + "learning_rate": 4.372748064300777e-06, + "loss": 0.23973286151885986, + "step": 2858 + }, + { + "epoch": 1.4127023353515384, + "grad_norm": 1.2884315615066577, + "learning_rate": 4.365995772206092e-06, + "loss": 0.26788556575775146, + "step": 2859 + }, + { + "epoch": 1.4131965896453726, + "grad_norm": 1.2479985472864645, + "learning_rate": 4.359247241292136e-06, + "loss": 0.22432288527488708, + "step": 2860 + }, + { + "epoch": 1.4136908439392069, + "grad_norm": 1.4071442664764462, + "learning_rate": 4.352502476064121e-06, + "loss": 0.282687783241272, + "step": 2861 + }, + { + "epoch": 1.4141850982330408, + "grad_norm": 1.350175603929749, + "learning_rate": 4.345761481024761e-06, + "loss": 0.2516692578792572, + "step": 2862 + }, + { + "epoch": 1.414679352526875, + "grad_norm": 1.3813903906983658, + "learning_rate": 4.3390242606742465e-06, + "loss": 0.2473583221435547, + "step": 2863 + }, + { + "epoch": 1.4151736068207093, + "grad_norm": 1.365125849897862, + "learning_rate": 4.33229081951025e-06, + "loss": 0.24372908473014832, + "step": 2864 + }, + { + "epoch": 1.4156678611145435, + "grad_norm": 1.935117633937839, + "learning_rate": 4.325561162027922e-06, + "loss": 0.2877897024154663, + "step": 2865 + }, + { + "epoch": 1.4161621154083777, + "grad_norm": 1.3789670558806315, + "learning_rate": 4.318835292719886e-06, + "loss": 0.2554720342159271, + "step": 2866 + }, + { + "epoch": 1.4166563697022119, + "grad_norm": 1.400243578908533, + "learning_rate": 4.312113216076228e-06, + "loss": 0.26695260405540466, + "step": 2867 + }, + { + "epoch": 1.4171506239960459, + "grad_norm": 1.310264039945657, + "learning_rate": 4.305394936584522e-06, + "loss": 0.26983851194381714, + "step": 2868 + }, + { + "epoch": 1.41764487828988, + "grad_norm": 1.4664847959785403, + "learning_rate": 4.298680458729793e-06, + "loss": 0.303170382976532, + "step": 2869 + }, + { + "epoch": 1.4181391325837143, + "grad_norm": 1.2870012899484584, + "learning_rate": 4.2919697869945234e-06, + "loss": 0.23217584192752838, + "step": 2870 + }, + { + "epoch": 1.4186333868775485, + "grad_norm": 1.3723703910904035, + "learning_rate": 4.285262925858663e-06, + "loss": 0.2895517349243164, + "step": 2871 + }, + { + "epoch": 1.4191276411713827, + "grad_norm": 1.3083324921698822, + "learning_rate": 4.278559879799628e-06, + "loss": 0.24025630950927734, + "step": 2872 + }, + { + "epoch": 1.4196218954652169, + "grad_norm": 1.2827271091784578, + "learning_rate": 4.271860653292263e-06, + "loss": 0.22810839116573334, + "step": 2873 + }, + { + "epoch": 1.420116149759051, + "grad_norm": 1.3806208017840322, + "learning_rate": 4.26516525080888e-06, + "loss": 0.266724169254303, + "step": 2874 + }, + { + "epoch": 1.4206104040528853, + "grad_norm": 1.225057219675358, + "learning_rate": 4.25847367681924e-06, + "loss": 0.22618745267391205, + "step": 2875 + }, + { + "epoch": 1.4211046583467195, + "grad_norm": 1.2369737958102245, + "learning_rate": 4.251785935790529e-06, + "loss": 0.2239789217710495, + "step": 2876 + }, + { + "epoch": 1.4215989126405535, + "grad_norm": 1.4266723106614325, + "learning_rate": 4.245102032187399e-06, + "loss": 0.21519358456134796, + "step": 2877 + }, + { + "epoch": 1.4220931669343877, + "grad_norm": 1.3543349519259755, + "learning_rate": 4.2384219704719284e-06, + "loss": 0.31226712465286255, + "step": 2878 + }, + { + "epoch": 1.422587421228222, + "grad_norm": 1.56763311196269, + "learning_rate": 4.231745755103625e-06, + "loss": 0.26814836263656616, + "step": 2879 + }, + { + "epoch": 1.423081675522056, + "grad_norm": 1.340943129837897, + "learning_rate": 4.225073390539436e-06, + "loss": 0.2369621843099594, + "step": 2880 + }, + { + "epoch": 1.4235759298158903, + "grad_norm": 1.4174455321042607, + "learning_rate": 4.218404881233737e-06, + "loss": 0.2556746304035187, + "step": 2881 + }, + { + "epoch": 1.4240701841097245, + "grad_norm": 1.4008574237374047, + "learning_rate": 4.2117402316383314e-06, + "loss": 0.25875598192214966, + "step": 2882 + }, + { + "epoch": 1.4245644384035585, + "grad_norm": 1.3837412182941131, + "learning_rate": 4.205079446202443e-06, + "loss": 0.26839762926101685, + "step": 2883 + }, + { + "epoch": 1.4250586926973927, + "grad_norm": 1.3404796422391116, + "learning_rate": 4.198422529372717e-06, + "loss": 0.2764383554458618, + "step": 2884 + }, + { + "epoch": 1.425552946991227, + "grad_norm": 1.6233600341280843, + "learning_rate": 4.191769485593216e-06, + "loss": 0.24517112970352173, + "step": 2885 + }, + { + "epoch": 1.4260472012850611, + "grad_norm": 1.2960278491651354, + "learning_rate": 4.18512031930542e-06, + "loss": 0.21880990266799927, + "step": 2886 + }, + { + "epoch": 1.4265414555788953, + "grad_norm": 1.25547495232964, + "learning_rate": 4.178475034948212e-06, + "loss": 0.24671246111392975, + "step": 2887 + }, + { + "epoch": 1.4270357098727295, + "grad_norm": 1.3321806455697769, + "learning_rate": 4.171833636957886e-06, + "loss": 0.25473371148109436, + "step": 2888 + }, + { + "epoch": 1.4275299641665637, + "grad_norm": 1.2832708163920512, + "learning_rate": 4.1651961297681574e-06, + "loss": 0.2675618529319763, + "step": 2889 + }, + { + "epoch": 1.428024218460398, + "grad_norm": 1.361777795281808, + "learning_rate": 4.15856251781012e-06, + "loss": 0.24357986450195312, + "step": 2890 + }, + { + "epoch": 1.4285184727542322, + "grad_norm": 1.360475333723739, + "learning_rate": 4.1519328055122825e-06, + "loss": 0.2668409049510956, + "step": 2891 + }, + { + "epoch": 1.4290127270480664, + "grad_norm": 1.237397304360782, + "learning_rate": 4.145306997300543e-06, + "loss": 0.24507637321949005, + "step": 2892 + }, + { + "epoch": 1.4295069813419004, + "grad_norm": 1.366253286129835, + "learning_rate": 4.1386850975982e-06, + "loss": 0.2791709899902344, + "step": 2893 + }, + { + "epoch": 1.4300012356357346, + "grad_norm": 1.2339989570889298, + "learning_rate": 4.132067110825939e-06, + "loss": 0.24982133507728577, + "step": 2894 + }, + { + "epoch": 1.4304954899295688, + "grad_norm": 1.4357848897595227, + "learning_rate": 4.125453041401835e-06, + "loss": 0.2814679741859436, + "step": 2895 + }, + { + "epoch": 1.430989744223403, + "grad_norm": 1.2447298736764703, + "learning_rate": 4.118842893741336e-06, + "loss": 0.22699782252311707, + "step": 2896 + }, + { + "epoch": 1.4314839985172372, + "grad_norm": 1.9366220135779266, + "learning_rate": 4.112236672257294e-06, + "loss": 0.23297230899333954, + "step": 2897 + }, + { + "epoch": 1.4319782528110712, + "grad_norm": 1.4169021772429402, + "learning_rate": 4.1056343813599265e-06, + "loss": 0.26085159182548523, + "step": 2898 + }, + { + "epoch": 1.4324725071049054, + "grad_norm": 1.2947699028454482, + "learning_rate": 4.0990360254568216e-06, + "loss": 0.27813559770584106, + "step": 2899 + }, + { + "epoch": 1.4329667613987396, + "grad_norm": 1.4648322974961994, + "learning_rate": 4.092441608952953e-06, + "loss": 0.2821611762046814, + "step": 2900 + }, + { + "epoch": 1.4334610156925738, + "grad_norm": 1.4262304528738896, + "learning_rate": 4.085851136250657e-06, + "loss": 0.25223150849342346, + "step": 2901 + }, + { + "epoch": 1.433955269986408, + "grad_norm": 1.2236760469459784, + "learning_rate": 4.079264611749639e-06, + "loss": 0.225361630320549, + "step": 2902 + }, + { + "epoch": 1.4344495242802422, + "grad_norm": 1.2980114377261416, + "learning_rate": 4.07268203984697e-06, + "loss": 0.2564583420753479, + "step": 2903 + }, + { + "epoch": 1.4349437785740764, + "grad_norm": 1.618238680371033, + "learning_rate": 4.066103424937083e-06, + "loss": 0.2433827817440033, + "step": 2904 + }, + { + "epoch": 1.4354380328679106, + "grad_norm": 1.326779755851318, + "learning_rate": 4.059528771411758e-06, + "loss": 0.26073208451271057, + "step": 2905 + }, + { + "epoch": 1.4359322871617448, + "grad_norm": 1.381783420476221, + "learning_rate": 4.052958083660153e-06, + "loss": 0.2937609553337097, + "step": 2906 + }, + { + "epoch": 1.436426541455579, + "grad_norm": 1.2248682484343931, + "learning_rate": 4.046391366068756e-06, + "loss": 0.22026552259922028, + "step": 2907 + }, + { + "epoch": 1.436920795749413, + "grad_norm": 1.2471555303405935, + "learning_rate": 4.039828623021415e-06, + "loss": 0.21137471497058868, + "step": 2908 + }, + { + "epoch": 1.4374150500432472, + "grad_norm": 1.316365476590171, + "learning_rate": 4.033269858899324e-06, + "loss": 0.23597699403762817, + "step": 2909 + }, + { + "epoch": 1.4379093043370814, + "grad_norm": 1.3166979356724768, + "learning_rate": 4.026715078081023e-06, + "loss": 0.2667025923728943, + "step": 2910 + }, + { + "epoch": 1.4384035586309156, + "grad_norm": 1.2942746954451143, + "learning_rate": 4.020164284942387e-06, + "loss": 0.2789616584777832, + "step": 2911 + }, + { + "epoch": 1.4388978129247498, + "grad_norm": 1.2105601579452838, + "learning_rate": 4.013617483856637e-06, + "loss": 0.23176617920398712, + "step": 2912 + }, + { + "epoch": 1.439392067218584, + "grad_norm": 1.3989428986083243, + "learning_rate": 4.007074679194313e-06, + "loss": 0.2814248204231262, + "step": 2913 + }, + { + "epoch": 1.439886321512418, + "grad_norm": 1.7399518805726892, + "learning_rate": 4.000535875323307e-06, + "loss": 0.26201730966567993, + "step": 2914 + }, + { + "epoch": 1.4403805758062522, + "grad_norm": 1.3752450122135709, + "learning_rate": 3.994001076608833e-06, + "loss": 0.22517681121826172, + "step": 2915 + }, + { + "epoch": 1.4408748301000864, + "grad_norm": 1.2576751634156127, + "learning_rate": 3.9874702874134205e-06, + "loss": 0.25220564007759094, + "step": 2916 + }, + { + "epoch": 1.4413690843939206, + "grad_norm": 1.3128506030513347, + "learning_rate": 3.980943512096934e-06, + "loss": 0.23441332578659058, + "step": 2917 + }, + { + "epoch": 1.4418633386877548, + "grad_norm": 1.1616125895518352, + "learning_rate": 3.9744207550165625e-06, + "loss": 0.21659764647483826, + "step": 2918 + }, + { + "epoch": 1.442357592981589, + "grad_norm": 1.3726974417027011, + "learning_rate": 3.967902020526797e-06, + "loss": 0.21888667345046997, + "step": 2919 + }, + { + "epoch": 1.4428518472754233, + "grad_norm": 2.445936326011648, + "learning_rate": 3.961387312979454e-06, + "loss": 0.2771157920360565, + "step": 2920 + }, + { + "epoch": 1.4433461015692575, + "grad_norm": 1.312047281106489, + "learning_rate": 3.9548766367236605e-06, + "loss": 0.21376901865005493, + "step": 2921 + }, + { + "epoch": 1.4438403558630917, + "grad_norm": 1.4472763394283668, + "learning_rate": 3.948369996105849e-06, + "loss": 0.2888128161430359, + "step": 2922 + }, + { + "epoch": 1.4443346101569257, + "grad_norm": 1.327788891714265, + "learning_rate": 3.941867395469761e-06, + "loss": 0.27809786796569824, + "step": 2923 + }, + { + "epoch": 1.4448288644507599, + "grad_norm": 1.377899507369851, + "learning_rate": 3.935368839156443e-06, + "loss": 0.2573625445365906, + "step": 2924 + }, + { + "epoch": 1.445323118744594, + "grad_norm": 1.5375959387987326, + "learning_rate": 3.928874331504232e-06, + "loss": 0.21472841501235962, + "step": 2925 + }, + { + "epoch": 1.4458173730384283, + "grad_norm": 1.2616393731465387, + "learning_rate": 3.922383876848771e-06, + "loss": 0.23214091360569, + "step": 2926 + }, + { + "epoch": 1.4463116273322625, + "grad_norm": 1.2717196020996628, + "learning_rate": 3.915897479522995e-06, + "loss": 0.23830139636993408, + "step": 2927 + }, + { + "epoch": 1.4468058816260967, + "grad_norm": 1.306053937449173, + "learning_rate": 3.909415143857132e-06, + "loss": 0.2519805431365967, + "step": 2928 + }, + { + "epoch": 1.4473001359199307, + "grad_norm": 1.3548983452054761, + "learning_rate": 3.9029368741786935e-06, + "loss": 0.2191445231437683, + "step": 2929 + }, + { + "epoch": 1.4477943902137649, + "grad_norm": 1.2448486288410623, + "learning_rate": 3.896462674812482e-06, + "loss": 0.2267228364944458, + "step": 2930 + }, + { + "epoch": 1.448288644507599, + "grad_norm": 1.3302096442776044, + "learning_rate": 3.88999255008058e-06, + "loss": 0.26456522941589355, + "step": 2931 + }, + { + "epoch": 1.4487828988014333, + "grad_norm": 1.3729869343228434, + "learning_rate": 3.883526504302353e-06, + "loss": 0.25602713227272034, + "step": 2932 + }, + { + "epoch": 1.4492771530952675, + "grad_norm": 1.9847312680384686, + "learning_rate": 3.877064541794435e-06, + "loss": 0.2545332610607147, + "step": 2933 + }, + { + "epoch": 1.4497714073891017, + "grad_norm": 1.3785644388388194, + "learning_rate": 3.87060666687074e-06, + "loss": 0.2846388816833496, + "step": 2934 + }, + { + "epoch": 1.450265661682936, + "grad_norm": 1.4353094721790403, + "learning_rate": 3.864152883842461e-06, + "loss": 0.2686496376991272, + "step": 2935 + }, + { + "epoch": 1.4507599159767701, + "grad_norm": 1.2943779410551872, + "learning_rate": 3.857703197018044e-06, + "loss": 0.2712322473526001, + "step": 2936 + }, + { + "epoch": 1.4512541702706043, + "grad_norm": 1.3542096863749147, + "learning_rate": 3.851257610703209e-06, + "loss": 0.23492589592933655, + "step": 2937 + }, + { + "epoch": 1.4517484245644385, + "grad_norm": 1.2747230322582852, + "learning_rate": 3.84481612920094e-06, + "loss": 0.274332731962204, + "step": 2938 + }, + { + "epoch": 1.4522426788582725, + "grad_norm": 1.4107112786506069, + "learning_rate": 3.838378756811475e-06, + "loss": 0.250995010137558, + "step": 2939 + }, + { + "epoch": 1.4527369331521067, + "grad_norm": 1.3749429977256393, + "learning_rate": 3.831945497832313e-06, + "loss": 0.25221261382102966, + "step": 2940 + }, + { + "epoch": 1.453231187445941, + "grad_norm": 1.4826415922959744, + "learning_rate": 3.825516356558211e-06, + "loss": 0.2549906075000763, + "step": 2941 + }, + { + "epoch": 1.4537254417397751, + "grad_norm": 1.296751596925164, + "learning_rate": 3.819091337281158e-06, + "loss": 0.2369248867034912, + "step": 2942 + }, + { + "epoch": 1.4542196960336093, + "grad_norm": 1.3057816538242708, + "learning_rate": 3.8126704442904182e-06, + "loss": 0.23681433498859406, + "step": 2943 + }, + { + "epoch": 1.4547139503274436, + "grad_norm": 1.237019268284654, + "learning_rate": 3.806253681872486e-06, + "loss": 0.24966523051261902, + "step": 2944 + }, + { + "epoch": 1.4552082046212775, + "grad_norm": 1.4768369352256168, + "learning_rate": 3.7998410543110954e-06, + "loss": 0.28130626678466797, + "step": 2945 + }, + { + "epoch": 1.4557024589151117, + "grad_norm": 1.3443210173277784, + "learning_rate": 3.7934325658872275e-06, + "loss": 0.2725732922554016, + "step": 2946 + }, + { + "epoch": 1.456196713208946, + "grad_norm": 1.3345618379823432, + "learning_rate": 3.7870282208790976e-06, + "loss": 0.23695361614227295, + "step": 2947 + }, + { + "epoch": 1.4566909675027802, + "grad_norm": 1.3094683367768178, + "learning_rate": 3.780628023562154e-06, + "loss": 0.2556610405445099, + "step": 2948 + }, + { + "epoch": 1.4571852217966144, + "grad_norm": 1.29841880424943, + "learning_rate": 3.7742319782090786e-06, + "loss": 0.26012274622917175, + "step": 2949 + }, + { + "epoch": 1.4576794760904486, + "grad_norm": 1.4612114957138427, + "learning_rate": 3.7678400890897827e-06, + "loss": 0.23788896203041077, + "step": 2950 + }, + { + "epoch": 1.4581737303842828, + "grad_norm": 1.4390155766896275, + "learning_rate": 3.7614523604713894e-06, + "loss": 0.2927572727203369, + "step": 2951 + }, + { + "epoch": 1.458667984678117, + "grad_norm": 1.2435143086118214, + "learning_rate": 3.75506879661827e-06, + "loss": 0.2254970222711563, + "step": 2952 + }, + { + "epoch": 1.4591622389719512, + "grad_norm": 1.2816222898303182, + "learning_rate": 3.7486894017919883e-06, + "loss": 0.216854065656662, + "step": 2953 + }, + { + "epoch": 1.4596564932657852, + "grad_norm": 1.1833481657982283, + "learning_rate": 3.7423141802513417e-06, + "loss": 0.2505137026309967, + "step": 2954 + }, + { + "epoch": 1.4601507475596194, + "grad_norm": 1.2187582021965486, + "learning_rate": 3.735943136252337e-06, + "loss": 0.19780108332633972, + "step": 2955 + }, + { + "epoch": 1.4606450018534536, + "grad_norm": 1.482633837182769, + "learning_rate": 3.7295762740481923e-06, + "loss": 0.26869216561317444, + "step": 2956 + }, + { + "epoch": 1.4611392561472878, + "grad_norm": 1.4121232274028632, + "learning_rate": 3.7232135978893336e-06, + "loss": 0.28265517950057983, + "step": 2957 + }, + { + "epoch": 1.461633510441122, + "grad_norm": 1.268342410891318, + "learning_rate": 3.7168551120233965e-06, + "loss": 0.2381918877363205, + "step": 2958 + }, + { + "epoch": 1.4621277647349562, + "grad_norm": 1.3343795310746396, + "learning_rate": 3.710500820695203e-06, + "loss": 0.27194735407829285, + "step": 2959 + }, + { + "epoch": 1.4626220190287902, + "grad_norm": 1.419071318428777, + "learning_rate": 3.7041507281468e-06, + "loss": 0.2611599266529083, + "step": 2960 + }, + { + "epoch": 1.4631162733226244, + "grad_norm": 1.3417831313824735, + "learning_rate": 3.697804838617418e-06, + "loss": 0.2970972955226898, + "step": 2961 + }, + { + "epoch": 1.4636105276164586, + "grad_norm": 1.3986503652920064, + "learning_rate": 3.6914631563434743e-06, + "loss": 0.24313557147979736, + "step": 2962 + }, + { + "epoch": 1.4641047819102928, + "grad_norm": 1.21693161859368, + "learning_rate": 3.685125685558587e-06, + "loss": 0.23243792355060577, + "step": 2963 + }, + { + "epoch": 1.464599036204127, + "grad_norm": 1.384655578733909, + "learning_rate": 3.6787924304935696e-06, + "loss": 0.2850711941719055, + "step": 2964 + }, + { + "epoch": 1.4650932904979612, + "grad_norm": 1.2938153090671698, + "learning_rate": 3.6724633953764023e-06, + "loss": 0.26217392086982727, + "step": 2965 + }, + { + "epoch": 1.4655875447917954, + "grad_norm": 1.3004956100522334, + "learning_rate": 3.666138584432264e-06, + "loss": 0.24623268842697144, + "step": 2966 + }, + { + "epoch": 1.4660817990856296, + "grad_norm": 1.2765502382143128, + "learning_rate": 3.6598180018835063e-06, + "loss": 0.25010040402412415, + "step": 2967 + }, + { + "epoch": 1.4665760533794638, + "grad_norm": 1.2806642930208934, + "learning_rate": 3.6535016519496603e-06, + "loss": 0.24471378326416016, + "step": 2968 + }, + { + "epoch": 1.467070307673298, + "grad_norm": 1.4411992818002375, + "learning_rate": 3.6471895388474323e-06, + "loss": 0.2845621109008789, + "step": 2969 + }, + { + "epoch": 1.467564561967132, + "grad_norm": 1.394997312403621, + "learning_rate": 3.640881666790699e-06, + "loss": 0.26768919825553894, + "step": 2970 + }, + { + "epoch": 1.4680588162609662, + "grad_norm": 1.3707198305280583, + "learning_rate": 3.6345780399904983e-06, + "loss": 0.27386170625686646, + "step": 2971 + }, + { + "epoch": 1.4685530705548004, + "grad_norm": 1.2413908046529407, + "learning_rate": 3.628278662655055e-06, + "loss": 0.259655237197876, + "step": 2972 + }, + { + "epoch": 1.4690473248486347, + "grad_norm": 1.2328404027424946, + "learning_rate": 3.6219835389897305e-06, + "loss": 0.2234620749950409, + "step": 2973 + }, + { + "epoch": 1.4695415791424689, + "grad_norm": 1.2170225214049992, + "learning_rate": 3.6156926731970664e-06, + "loss": 0.25133174657821655, + "step": 2974 + }, + { + "epoch": 1.4700358334363028, + "grad_norm": 1.4753631122763826, + "learning_rate": 3.609406069476752e-06, + "loss": 0.2856005132198334, + "step": 2975 + }, + { + "epoch": 1.470530087730137, + "grad_norm": 1.352763052735898, + "learning_rate": 3.603123732025635e-06, + "loss": 0.23760217428207397, + "step": 2976 + }, + { + "epoch": 1.4710243420239713, + "grad_norm": 1.315945468844056, + "learning_rate": 3.596845665037715e-06, + "loss": 0.2344968169927597, + "step": 2977 + }, + { + "epoch": 1.4715185963178055, + "grad_norm": 1.3513242562279373, + "learning_rate": 3.5905718727041415e-06, + "loss": 0.23936885595321655, + "step": 2978 + }, + { + "epoch": 1.4720128506116397, + "grad_norm": 1.2281537442777626, + "learning_rate": 3.584302359213204e-06, + "loss": 0.24542436003684998, + "step": 2979 + }, + { + "epoch": 1.4725071049054739, + "grad_norm": 1.2816242991916544, + "learning_rate": 3.578037128750338e-06, + "loss": 0.24754226207733154, + "step": 2980 + }, + { + "epoch": 1.473001359199308, + "grad_norm": 1.3406109779820896, + "learning_rate": 3.5717761854981335e-06, + "loss": 0.25167495012283325, + "step": 2981 + }, + { + "epoch": 1.4734956134931423, + "grad_norm": 1.2820406301810907, + "learning_rate": 3.565519533636296e-06, + "loss": 0.21352116763591766, + "step": 2982 + }, + { + "epoch": 1.4739898677869765, + "grad_norm": 1.5800404779419173, + "learning_rate": 3.5592671773416798e-06, + "loss": 0.24721838533878326, + "step": 2983 + }, + { + "epoch": 1.4744841220808107, + "grad_norm": 1.209332122723965, + "learning_rate": 3.5530191207882705e-06, + "loss": 0.2098400741815567, + "step": 2984 + }, + { + "epoch": 1.4749783763746447, + "grad_norm": 1.4059961620340085, + "learning_rate": 3.5467753681471784e-06, + "loss": 0.27138370275497437, + "step": 2985 + }, + { + "epoch": 1.475472630668479, + "grad_norm": 1.456553871591733, + "learning_rate": 3.5405359235866468e-06, + "loss": 0.2675255537033081, + "step": 2986 + }, + { + "epoch": 1.475966884962313, + "grad_norm": 1.3852192514849078, + "learning_rate": 3.5343007912720397e-06, + "loss": 0.2927984893321991, + "step": 2987 + }, + { + "epoch": 1.4764611392561473, + "grad_norm": 1.4840757807353469, + "learning_rate": 3.5280699753658354e-06, + "loss": 0.2897256910800934, + "step": 2988 + }, + { + "epoch": 1.4769553935499815, + "grad_norm": 1.3162511876956198, + "learning_rate": 3.521843480027646e-06, + "loss": 0.25903570652008057, + "step": 2989 + }, + { + "epoch": 1.4774496478438157, + "grad_norm": 1.1815962199969574, + "learning_rate": 3.515621309414191e-06, + "loss": 0.2097684144973755, + "step": 2990 + }, + { + "epoch": 1.4779439021376497, + "grad_norm": 1.368257943211956, + "learning_rate": 3.5094034676792952e-06, + "loss": 0.25807827711105347, + "step": 2991 + }, + { + "epoch": 1.478438156431484, + "grad_norm": 1.3326288392160186, + "learning_rate": 3.503189958973906e-06, + "loss": 0.24161803722381592, + "step": 2992 + }, + { + "epoch": 1.4789324107253181, + "grad_norm": 1.3735233821721475, + "learning_rate": 3.4969807874460717e-06, + "loss": 0.2612338364124298, + "step": 2993 + }, + { + "epoch": 1.4794266650191523, + "grad_norm": 1.3484776453875857, + "learning_rate": 3.490775957240947e-06, + "loss": 0.2529192566871643, + "step": 2994 + }, + { + "epoch": 1.4799209193129865, + "grad_norm": 1.376626480795096, + "learning_rate": 3.4845754725007883e-06, + "loss": 0.2616920471191406, + "step": 2995 + }, + { + "epoch": 1.4804151736068207, + "grad_norm": 1.1709509708234012, + "learning_rate": 3.4783793373649534e-06, + "loss": 0.2372770607471466, + "step": 2996 + }, + { + "epoch": 1.480909427900655, + "grad_norm": 1.6683733615888718, + "learning_rate": 3.4721875559698826e-06, + "loss": 0.2993369996547699, + "step": 2997 + }, + { + "epoch": 1.4814036821944891, + "grad_norm": 1.444631738912031, + "learning_rate": 3.4660001324491354e-06, + "loss": 0.2703147530555725, + "step": 2998 + }, + { + "epoch": 1.4818979364883234, + "grad_norm": 1.497851135078702, + "learning_rate": 3.459817070933337e-06, + "loss": 0.2909662425518036, + "step": 2999 + }, + { + "epoch": 1.4823921907821573, + "grad_norm": 1.4957339087199897, + "learning_rate": 3.4536383755502146e-06, + "loss": 0.2620519697666168, + "step": 3000 + }, + { + "epoch": 1.4828864450759915, + "grad_norm": 1.4607702963487426, + "learning_rate": 3.447464050424576e-06, + "loss": 0.2740327715873718, + "step": 3001 + }, + { + "epoch": 1.4833806993698258, + "grad_norm": 1.4051737005514326, + "learning_rate": 3.441294099678314e-06, + "loss": 0.2597920000553131, + "step": 3002 + }, + { + "epoch": 1.48387495366366, + "grad_norm": 1.2931150222772085, + "learning_rate": 3.435128527430397e-06, + "loss": 0.23138844966888428, + "step": 3003 + }, + { + "epoch": 1.4843692079574942, + "grad_norm": 1.4678522965018421, + "learning_rate": 3.428967337796879e-06, + "loss": 0.26457998156547546, + "step": 3004 + }, + { + "epoch": 1.4848634622513284, + "grad_norm": 1.3435199008351797, + "learning_rate": 3.4228105348908703e-06, + "loss": 0.22283414006233215, + "step": 3005 + }, + { + "epoch": 1.4853577165451624, + "grad_norm": 1.404722725472706, + "learning_rate": 3.416658122822576e-06, + "loss": 0.26169392466545105, + "step": 3006 + }, + { + "epoch": 1.4858519708389966, + "grad_norm": 1.3942121909077798, + "learning_rate": 3.4105101056992574e-06, + "loss": 0.22738765180110931, + "step": 3007 + }, + { + "epoch": 1.4863462251328308, + "grad_norm": 1.640113120385147, + "learning_rate": 3.404366487625237e-06, + "loss": 0.24252702295780182, + "step": 3008 + }, + { + "epoch": 1.486840479426665, + "grad_norm": 1.2658350422978366, + "learning_rate": 3.398227272701905e-06, + "loss": 0.2192659229040146, + "step": 3009 + }, + { + "epoch": 1.4873347337204992, + "grad_norm": 1.3659525117305242, + "learning_rate": 3.3920924650277253e-06, + "loss": 0.23824100196361542, + "step": 3010 + }, + { + "epoch": 1.4878289880143334, + "grad_norm": 1.304246601014088, + "learning_rate": 3.3859620686981977e-06, + "loss": 0.25558948516845703, + "step": 3011 + }, + { + "epoch": 1.4883232423081676, + "grad_norm": 1.2977660969069507, + "learning_rate": 3.3798360878058887e-06, + "loss": 0.23521414399147034, + "step": 3012 + }, + { + "epoch": 1.4888174966020018, + "grad_norm": 1.5059732923775448, + "learning_rate": 3.373714526440417e-06, + "loss": 0.26024043560028076, + "step": 3013 + }, + { + "epoch": 1.489311750895836, + "grad_norm": 1.3966534942487767, + "learning_rate": 3.3675973886884506e-06, + "loss": 0.2676945626735687, + "step": 3014 + }, + { + "epoch": 1.4898060051896702, + "grad_norm": 1.4302757106543351, + "learning_rate": 3.361484678633701e-06, + "loss": 0.29499778151512146, + "step": 3015 + }, + { + "epoch": 1.4903002594835042, + "grad_norm": 1.2541194356509255, + "learning_rate": 3.35537640035693e-06, + "loss": 0.21667227149009705, + "step": 3016 + }, + { + "epoch": 1.4907945137773384, + "grad_norm": 1.5055716214820787, + "learning_rate": 3.3492725579359288e-06, + "loss": 0.2852727770805359, + "step": 3017 + }, + { + "epoch": 1.4912887680711726, + "grad_norm": 1.3110566349547437, + "learning_rate": 3.343173155445546e-06, + "loss": 0.22535362839698792, + "step": 3018 + }, + { + "epoch": 1.4917830223650068, + "grad_norm": 1.3390943365322368, + "learning_rate": 3.3370781969576473e-06, + "loss": 0.23513402044773102, + "step": 3019 + }, + { + "epoch": 1.492277276658841, + "grad_norm": 1.34171251218287, + "learning_rate": 3.3309876865411426e-06, + "loss": 0.2343328893184662, + "step": 3020 + }, + { + "epoch": 1.4927715309526752, + "grad_norm": 1.4982279835949508, + "learning_rate": 3.3249016282619696e-06, + "loss": 0.309964656829834, + "step": 3021 + }, + { + "epoch": 1.4932657852465092, + "grad_norm": 1.4104830526650916, + "learning_rate": 3.318820026183095e-06, + "loss": 0.2678214907646179, + "step": 3022 + }, + { + "epoch": 1.4937600395403434, + "grad_norm": 1.3871314289257326, + "learning_rate": 3.312742884364508e-06, + "loss": 0.24117907881736755, + "step": 3023 + }, + { + "epoch": 1.4942542938341776, + "grad_norm": 1.4966526123322192, + "learning_rate": 3.306670206863225e-06, + "loss": 0.23572009801864624, + "step": 3024 + }, + { + "epoch": 1.4947485481280118, + "grad_norm": 1.1974970903692888, + "learning_rate": 3.3006019977332728e-06, + "loss": 0.20058652758598328, + "step": 3025 + }, + { + "epoch": 1.495242802421846, + "grad_norm": 1.4552709446661256, + "learning_rate": 3.2945382610257017e-06, + "loss": 0.2433123141527176, + "step": 3026 + }, + { + "epoch": 1.4957370567156802, + "grad_norm": 1.330592869585441, + "learning_rate": 3.2884790007885834e-06, + "loss": 0.2648032009601593, + "step": 3027 + }, + { + "epoch": 1.4962313110095145, + "grad_norm": 1.4274009022113794, + "learning_rate": 3.2824242210669853e-06, + "loss": 0.23508986830711365, + "step": 3028 + }, + { + "epoch": 1.4967255653033487, + "grad_norm": 1.337116326245031, + "learning_rate": 3.2763739259029946e-06, + "loss": 0.2340327799320221, + "step": 3029 + }, + { + "epoch": 1.4972198195971829, + "grad_norm": 1.4724312525996526, + "learning_rate": 3.2703281193357028e-06, + "loss": 0.24071671068668365, + "step": 3030 + }, + { + "epoch": 1.4977140738910169, + "grad_norm": 1.4191732736253682, + "learning_rate": 3.264286805401203e-06, + "loss": 0.26332271099090576, + "step": 3031 + }, + { + "epoch": 1.498208328184851, + "grad_norm": 1.266600605298302, + "learning_rate": 3.2582499881325904e-06, + "loss": 0.21818014979362488, + "step": 3032 + }, + { + "epoch": 1.4987025824786853, + "grad_norm": 1.3340246980776698, + "learning_rate": 3.2522176715599606e-06, + "loss": 0.26997917890548706, + "step": 3033 + }, + { + "epoch": 1.4991968367725195, + "grad_norm": 1.4818331950802985, + "learning_rate": 3.2461898597103935e-06, + "loss": 0.21703608334064484, + "step": 3034 + }, + { + "epoch": 1.4996910910663537, + "grad_norm": 1.287764216628678, + "learning_rate": 3.240166556607979e-06, + "loss": 0.24345526099205017, + "step": 3035 + }, + { + "epoch": 1.5001853453601877, + "grad_norm": 1.2134455175661707, + "learning_rate": 3.2341477662737877e-06, + "loss": 0.2428402602672577, + "step": 3036 + }, + { + "epoch": 1.5006795996540219, + "grad_norm": 1.389226279044202, + "learning_rate": 3.228133492725872e-06, + "loss": 0.234619602560997, + "step": 3037 + }, + { + "epoch": 1.501173853947856, + "grad_norm": 1.3308420188359134, + "learning_rate": 3.2221237399792784e-06, + "loss": 0.27995944023132324, + "step": 3038 + }, + { + "epoch": 1.5016681082416903, + "grad_norm": 1.283844133259085, + "learning_rate": 3.2161185120460327e-06, + "loss": 0.23708665370941162, + "step": 3039 + }, + { + "epoch": 1.5021623625355245, + "grad_norm": 1.3268773172813266, + "learning_rate": 3.2101178129351373e-06, + "loss": 0.2541486620903015, + "step": 3040 + }, + { + "epoch": 1.5026566168293587, + "grad_norm": 1.2735534589560005, + "learning_rate": 3.204121646652576e-06, + "loss": 0.2281494140625, + "step": 3041 + }, + { + "epoch": 1.503150871123193, + "grad_norm": 1.4214183804465141, + "learning_rate": 3.1981300172013006e-06, + "loss": 0.24793995916843414, + "step": 3042 + }, + { + "epoch": 1.503645125417027, + "grad_norm": 1.3820844339773122, + "learning_rate": 3.19214292858124e-06, + "loss": 0.25877612829208374, + "step": 3043 + }, + { + "epoch": 1.5041393797108613, + "grad_norm": 1.2606638362034603, + "learning_rate": 3.1861603847892907e-06, + "loss": 0.23822908103466034, + "step": 3044 + }, + { + "epoch": 1.5046336340046955, + "grad_norm": 1.3375723790086107, + "learning_rate": 3.1801823898193075e-06, + "loss": 0.2450297623872757, + "step": 3045 + }, + { + "epoch": 1.5051278882985297, + "grad_norm": 1.291286771303469, + "learning_rate": 3.1742089476621176e-06, + "loss": 0.23657044768333435, + "step": 3046 + }, + { + "epoch": 1.505622142592364, + "grad_norm": 1.330327819651038, + "learning_rate": 3.1682400623055043e-06, + "loss": 0.22040539979934692, + "step": 3047 + }, + { + "epoch": 1.506116396886198, + "grad_norm": 1.2295078748580162, + "learning_rate": 3.162275737734213e-06, + "loss": 0.24671347439289093, + "step": 3048 + }, + { + "epoch": 1.5066106511800321, + "grad_norm": 1.3193055288047242, + "learning_rate": 3.156315977929939e-06, + "loss": 0.2590971291065216, + "step": 3049 + }, + { + "epoch": 1.5071049054738663, + "grad_norm": 1.3201796395435559, + "learning_rate": 3.1503607868713383e-06, + "loss": 0.2650923430919647, + "step": 3050 + }, + { + "epoch": 1.5075991597677005, + "grad_norm": 1.3124240495866886, + "learning_rate": 3.1444101685339987e-06, + "loss": 0.22146420180797577, + "step": 3051 + }, + { + "epoch": 1.5080934140615345, + "grad_norm": 1.3875424644692997, + "learning_rate": 3.1384641268904804e-06, + "loss": 0.26743125915527344, + "step": 3052 + }, + { + "epoch": 1.5085876683553687, + "grad_norm": 1.4406215302595167, + "learning_rate": 3.1325226659102746e-06, + "loss": 0.24730908870697021, + "step": 3053 + }, + { + "epoch": 1.509081922649203, + "grad_norm": 1.3933207280707873, + "learning_rate": 3.1265857895598094e-06, + "loss": 0.26301079988479614, + "step": 3054 + }, + { + "epoch": 1.5095761769430371, + "grad_norm": 1.2589035946994764, + "learning_rate": 3.1206535018024598e-06, + "loss": 0.22815877199172974, + "step": 3055 + }, + { + "epoch": 1.5100704312368713, + "grad_norm": 1.533757049437193, + "learning_rate": 3.114725806598544e-06, + "loss": 0.25178754329681396, + "step": 3056 + }, + { + "epoch": 1.5105646855307056, + "grad_norm": 1.3661154596053653, + "learning_rate": 3.1088027079052973e-06, + "loss": 0.20269548892974854, + "step": 3057 + }, + { + "epoch": 1.5110589398245398, + "grad_norm": 1.4014331356202114, + "learning_rate": 3.1028842096769006e-06, + "loss": 0.25972461700439453, + "step": 3058 + }, + { + "epoch": 1.511553194118374, + "grad_norm": 1.3745096869790834, + "learning_rate": 3.0969703158644583e-06, + "loss": 0.23313641548156738, + "step": 3059 + }, + { + "epoch": 1.5120474484122082, + "grad_norm": 1.2941298023610517, + "learning_rate": 3.0910610304159993e-06, + "loss": 0.2359476238489151, + "step": 3060 + }, + { + "epoch": 1.5125417027060424, + "grad_norm": 1.3631605592123968, + "learning_rate": 3.085156357276481e-06, + "loss": 0.263039767742157, + "step": 3061 + }, + { + "epoch": 1.5130359569998766, + "grad_norm": 1.4414947958352682, + "learning_rate": 3.0792563003877795e-06, + "loss": 0.2222701609134674, + "step": 3062 + }, + { + "epoch": 1.5135302112937106, + "grad_norm": 1.5152386602086467, + "learning_rate": 3.0733608636886815e-06, + "loss": 0.2511240839958191, + "step": 3063 + }, + { + "epoch": 1.5140244655875448, + "grad_norm": 1.3426863589238012, + "learning_rate": 3.0674700511149057e-06, + "loss": 0.26376873254776, + "step": 3064 + }, + { + "epoch": 1.514518719881379, + "grad_norm": 1.50705834278763, + "learning_rate": 3.0615838665990685e-06, + "loss": 0.2883176803588867, + "step": 3065 + }, + { + "epoch": 1.5150129741752132, + "grad_norm": 1.4534493774446482, + "learning_rate": 3.055702314070703e-06, + "loss": 0.2641439437866211, + "step": 3066 + }, + { + "epoch": 1.5155072284690472, + "grad_norm": 1.2206107550113217, + "learning_rate": 3.049825397456252e-06, + "loss": 0.22250229120254517, + "step": 3067 + }, + { + "epoch": 1.5160014827628814, + "grad_norm": 1.6917159383624243, + "learning_rate": 3.0439531206790585e-06, + "loss": 0.291684091091156, + "step": 3068 + }, + { + "epoch": 1.5164957370567156, + "grad_norm": 1.2582948861406589, + "learning_rate": 3.0380854876593725e-06, + "loss": 0.22581104934215546, + "step": 3069 + }, + { + "epoch": 1.5169899913505498, + "grad_norm": 1.3218689478609282, + "learning_rate": 3.032222502314345e-06, + "loss": 0.22701920568943024, + "step": 3070 + }, + { + "epoch": 1.517484245644384, + "grad_norm": 1.4011754473371674, + "learning_rate": 3.0263641685580134e-06, + "loss": 0.27151840925216675, + "step": 3071 + }, + { + "epoch": 1.5179784999382182, + "grad_norm": 1.4319870241234463, + "learning_rate": 3.0205104903013183e-06, + "loss": 0.25780510902404785, + "step": 3072 + }, + { + "epoch": 1.5184727542320524, + "grad_norm": 1.232949136662072, + "learning_rate": 3.014661471452103e-06, + "loss": 0.23905009031295776, + "step": 3073 + }, + { + "epoch": 1.5189670085258866, + "grad_norm": 1.296685135563547, + "learning_rate": 3.0088171159150758e-06, + "loss": 0.25984710454940796, + "step": 3074 + }, + { + "epoch": 1.5194612628197208, + "grad_norm": 1.5925440917505933, + "learning_rate": 3.0029774275918523e-06, + "loss": 0.24934321641921997, + "step": 3075 + }, + { + "epoch": 1.519955517113555, + "grad_norm": 1.3570253725800296, + "learning_rate": 2.997142410380921e-06, + "loss": 0.24181538820266724, + "step": 3076 + }, + { + "epoch": 1.5204497714073892, + "grad_norm": 1.4224922399256614, + "learning_rate": 2.9913120681776586e-06, + "loss": 0.28867265582084656, + "step": 3077 + }, + { + "epoch": 1.5209440257012234, + "grad_norm": 1.3689537883355085, + "learning_rate": 2.9854864048743183e-06, + "loss": 0.25082239508628845, + "step": 3078 + }, + { + "epoch": 1.5214382799950574, + "grad_norm": 1.1809552467181543, + "learning_rate": 2.979665424360031e-06, + "loss": 0.21152186393737793, + "step": 3079 + }, + { + "epoch": 1.5219325342888916, + "grad_norm": 1.3255328033562375, + "learning_rate": 2.9738491305207926e-06, + "loss": 0.22989922761917114, + "step": 3080 + }, + { + "epoch": 1.5224267885827258, + "grad_norm": 1.4352789035320561, + "learning_rate": 2.9680375272394855e-06, + "loss": 0.21606113016605377, + "step": 3081 + }, + { + "epoch": 1.5229210428765598, + "grad_norm": 1.2795767684328416, + "learning_rate": 2.962230618395855e-06, + "loss": 0.25060969591140747, + "step": 3082 + }, + { + "epoch": 1.523415297170394, + "grad_norm": 1.4409246111783223, + "learning_rate": 2.9564284078665016e-06, + "loss": 0.2574993371963501, + "step": 3083 + }, + { + "epoch": 1.5239095514642282, + "grad_norm": 1.3476850353049301, + "learning_rate": 2.9506308995249035e-06, + "loss": 0.2552590072154999, + "step": 3084 + }, + { + "epoch": 1.5244038057580624, + "grad_norm": 1.4294064187721107, + "learning_rate": 2.9448380972413936e-06, + "loss": 0.2356393188238144, + "step": 3085 + }, + { + "epoch": 1.5248980600518967, + "grad_norm": 1.2956637091449177, + "learning_rate": 2.939050004883164e-06, + "loss": 0.25111299753189087, + "step": 3086 + }, + { + "epoch": 1.5253923143457309, + "grad_norm": 1.6187968050107684, + "learning_rate": 2.933266626314263e-06, + "loss": 0.2713226079940796, + "step": 3087 + }, + { + "epoch": 1.525886568639565, + "grad_norm": 1.371480760416421, + "learning_rate": 2.92748796539559e-06, + "loss": 0.2493591606616974, + "step": 3088 + }, + { + "epoch": 1.5263808229333993, + "grad_norm": 1.3919253891743593, + "learning_rate": 2.9217140259848984e-06, + "loss": 0.2377934455871582, + "step": 3089 + }, + { + "epoch": 1.5268750772272335, + "grad_norm": 1.222188939870737, + "learning_rate": 2.9159448119367896e-06, + "loss": 0.23113523423671722, + "step": 3090 + }, + { + "epoch": 1.5273693315210677, + "grad_norm": 1.3071786210451368, + "learning_rate": 2.910180327102702e-06, + "loss": 0.2212657630443573, + "step": 3091 + }, + { + "epoch": 1.527863585814902, + "grad_norm": 1.4809706556535216, + "learning_rate": 2.904420575330923e-06, + "loss": 0.3317147195339203, + "step": 3092 + }, + { + "epoch": 1.528357840108736, + "grad_norm": 1.222501836116789, + "learning_rate": 2.8986655604665914e-06, + "loss": 0.21677865087985992, + "step": 3093 + }, + { + "epoch": 1.52885209440257, + "grad_norm": 1.4687657258901345, + "learning_rate": 2.892915286351663e-06, + "loss": 0.2719038724899292, + "step": 3094 + }, + { + "epoch": 1.5293463486964043, + "grad_norm": 1.4800981330468082, + "learning_rate": 2.887169756824941e-06, + "loss": 0.2870655953884125, + "step": 3095 + }, + { + "epoch": 1.5298406029902385, + "grad_norm": 1.6050530390151894, + "learning_rate": 2.8814289757220636e-06, + "loss": 0.27370864152908325, + "step": 3096 + }, + { + "epoch": 1.5303348572840727, + "grad_norm": 1.2925821727625635, + "learning_rate": 2.8756929468754834e-06, + "loss": 0.24579623341560364, + "step": 3097 + }, + { + "epoch": 1.5308291115779067, + "grad_norm": 1.5466324939604184, + "learning_rate": 2.869961674114501e-06, + "loss": 0.25092196464538574, + "step": 3098 + }, + { + "epoch": 1.531323365871741, + "grad_norm": 1.539826368870157, + "learning_rate": 2.864235161265232e-06, + "loss": 0.29637211561203003, + "step": 3099 + }, + { + "epoch": 1.531817620165575, + "grad_norm": 1.346232107313421, + "learning_rate": 2.8585134121506086e-06, + "loss": 0.24216854572296143, + "step": 3100 + }, + { + "epoch": 1.5323118744594093, + "grad_norm": 1.264644352464564, + "learning_rate": 2.8527964305903887e-06, + "loss": 0.2050018608570099, + "step": 3101 + }, + { + "epoch": 1.5328061287532435, + "grad_norm": 1.4429594327267479, + "learning_rate": 2.8470842204011562e-06, + "loss": 0.2323600798845291, + "step": 3102 + }, + { + "epoch": 1.5333003830470777, + "grad_norm": 1.3588986581117766, + "learning_rate": 2.8413767853962937e-06, + "loss": 0.2582741379737854, + "step": 3103 + }, + { + "epoch": 1.533794637340912, + "grad_norm": 1.2503142010331656, + "learning_rate": 2.8356741293860034e-06, + "loss": 0.2190069705247879, + "step": 3104 + }, + { + "epoch": 1.5342888916347461, + "grad_norm": 1.2700906528895424, + "learning_rate": 2.8299762561773004e-06, + "loss": 0.2293972671031952, + "step": 3105 + }, + { + "epoch": 1.5347831459285803, + "grad_norm": 1.4604730845156306, + "learning_rate": 2.8242831695740004e-06, + "loss": 0.28793102502822876, + "step": 3106 + }, + { + "epoch": 1.5352774002224145, + "grad_norm": 1.3871033704581968, + "learning_rate": 2.8185948733767276e-06, + "loss": 0.25700464844703674, + "step": 3107 + }, + { + "epoch": 1.5357716545162488, + "grad_norm": 1.6036334059609652, + "learning_rate": 2.8129113713829115e-06, + "loss": 0.2633448541164398, + "step": 3108 + }, + { + "epoch": 1.5362659088100827, + "grad_norm": 1.2623866770143863, + "learning_rate": 2.8072326673867667e-06, + "loss": 0.2363145351409912, + "step": 3109 + }, + { + "epoch": 1.536760163103917, + "grad_norm": 1.3073287831639788, + "learning_rate": 2.8015587651793273e-06, + "loss": 0.24324053525924683, + "step": 3110 + }, + { + "epoch": 1.5372544173977511, + "grad_norm": 1.445888976457047, + "learning_rate": 2.795889668548399e-06, + "loss": 0.24139198660850525, + "step": 3111 + }, + { + "epoch": 1.5377486716915854, + "grad_norm": 1.3070463104686283, + "learning_rate": 2.790225381278595e-06, + "loss": 0.2502334713935852, + "step": 3112 + }, + { + "epoch": 1.5382429259854193, + "grad_norm": 1.3233606598015195, + "learning_rate": 2.784565907151311e-06, + "loss": 0.24635109305381775, + "step": 3113 + }, + { + "epoch": 1.5387371802792535, + "grad_norm": 1.236974627125298, + "learning_rate": 2.7789112499447312e-06, + "loss": 0.2299586534500122, + "step": 3114 + }, + { + "epoch": 1.5392314345730878, + "grad_norm": 1.232633224868461, + "learning_rate": 2.7732614134338243e-06, + "loss": 0.2296627312898636, + "step": 3115 + }, + { + "epoch": 1.539725688866922, + "grad_norm": 1.3919487561893158, + "learning_rate": 2.767616401390343e-06, + "loss": 0.26127320528030396, + "step": 3116 + }, + { + "epoch": 1.5402199431607562, + "grad_norm": 1.3612758454379796, + "learning_rate": 2.761976217582808e-06, + "loss": 0.24718445539474487, + "step": 3117 + }, + { + "epoch": 1.5407141974545904, + "grad_norm": 1.3000063965271036, + "learning_rate": 2.7563408657765345e-06, + "loss": 0.22314362227916718, + "step": 3118 + }, + { + "epoch": 1.5412084517484246, + "grad_norm": 1.2190954536725822, + "learning_rate": 2.750710349733602e-06, + "loss": 0.2288416028022766, + "step": 3119 + }, + { + "epoch": 1.5417027060422588, + "grad_norm": 1.3774388084670495, + "learning_rate": 2.7450846732128577e-06, + "loss": 0.26181158423423767, + "step": 3120 + }, + { + "epoch": 1.542196960336093, + "grad_norm": 1.2123920647911897, + "learning_rate": 2.739463839969926e-06, + "loss": 0.22397834062576294, + "step": 3121 + }, + { + "epoch": 1.5426912146299272, + "grad_norm": 1.4361842348504215, + "learning_rate": 2.7338478537571943e-06, + "loss": 0.23633858561515808, + "step": 3122 + }, + { + "epoch": 1.5431854689237614, + "grad_norm": 1.402092217147563, + "learning_rate": 2.7282367183238143e-06, + "loss": 0.26719149947166443, + "step": 3123 + }, + { + "epoch": 1.5436797232175956, + "grad_norm": 1.5260713360749147, + "learning_rate": 2.722630437415701e-06, + "loss": 0.2882165014743805, + "step": 3124 + }, + { + "epoch": 1.5441739775114296, + "grad_norm": 1.258294682394544, + "learning_rate": 2.7170290147755285e-06, + "loss": 0.2377905696630478, + "step": 3125 + }, + { + "epoch": 1.5446682318052638, + "grad_norm": 1.3195147017546947, + "learning_rate": 2.7114324541427193e-06, + "loss": 0.2705368399620056, + "step": 3126 + }, + { + "epoch": 1.545162486099098, + "grad_norm": 1.2857701503132921, + "learning_rate": 2.7058407592534663e-06, + "loss": 0.246593177318573, + "step": 3127 + }, + { + "epoch": 1.5456567403929322, + "grad_norm": 1.33265619524068, + "learning_rate": 2.700253933840705e-06, + "loss": 0.2339816391468048, + "step": 3128 + }, + { + "epoch": 1.5461509946867662, + "grad_norm": 1.3254997645322988, + "learning_rate": 2.6946719816341127e-06, + "loss": 0.2727898359298706, + "step": 3129 + }, + { + "epoch": 1.5466452489806004, + "grad_norm": 1.483440007746236, + "learning_rate": 2.6890949063601255e-06, + "loss": 0.285343736410141, + "step": 3130 + }, + { + "epoch": 1.5471395032744346, + "grad_norm": 1.4219498161281177, + "learning_rate": 2.6835227117419184e-06, + "loss": 0.25782397389411926, + "step": 3131 + }, + { + "epoch": 1.5476337575682688, + "grad_norm": 1.4096561970820742, + "learning_rate": 2.67795540149941e-06, + "loss": 0.26677054166793823, + "step": 3132 + }, + { + "epoch": 1.548128011862103, + "grad_norm": 1.375758748898483, + "learning_rate": 2.6723929793492555e-06, + "loss": 0.2696993052959442, + "step": 3133 + }, + { + "epoch": 1.5486222661559372, + "grad_norm": 1.3214248540646165, + "learning_rate": 2.66683544900485e-06, + "loss": 0.2536013424396515, + "step": 3134 + }, + { + "epoch": 1.5491165204497714, + "grad_norm": 1.352660590997614, + "learning_rate": 2.661282814176319e-06, + "loss": 0.2583885192871094, + "step": 3135 + }, + { + "epoch": 1.5496107747436056, + "grad_norm": 1.3555750519784333, + "learning_rate": 2.655735078570528e-06, + "loss": 0.24341340363025665, + "step": 3136 + }, + { + "epoch": 1.5501050290374399, + "grad_norm": 1.3694743065317843, + "learning_rate": 2.650192245891059e-06, + "loss": 0.2575637698173523, + "step": 3137 + }, + { + "epoch": 1.550599283331274, + "grad_norm": 1.3743479794773286, + "learning_rate": 2.644654319838227e-06, + "loss": 0.24109753966331482, + "step": 3138 + }, + { + "epoch": 1.5510935376251083, + "grad_norm": 1.2822421062589742, + "learning_rate": 2.6391213041090822e-06, + "loss": 0.246525377035141, + "step": 3139 + }, + { + "epoch": 1.5515877919189422, + "grad_norm": 1.3144657839500415, + "learning_rate": 2.6335932023973777e-06, + "loss": 0.2589566111564636, + "step": 3140 + }, + { + "epoch": 1.5520820462127765, + "grad_norm": 1.333811387247849, + "learning_rate": 2.628070018393598e-06, + "loss": 0.26198744773864746, + "step": 3141 + }, + { + "epoch": 1.5525763005066107, + "grad_norm": 1.2808916237604833, + "learning_rate": 2.622551755784942e-06, + "loss": 0.22991782426834106, + "step": 3142 + }, + { + "epoch": 1.5530705548004449, + "grad_norm": 1.242582313641482, + "learning_rate": 2.6170384182553244e-06, + "loss": 0.22211629152297974, + "step": 3143 + }, + { + "epoch": 1.5535648090942789, + "grad_norm": 1.306994517774283, + "learning_rate": 2.6115300094853666e-06, + "loss": 0.2665289640426636, + "step": 3144 + }, + { + "epoch": 1.554059063388113, + "grad_norm": 1.260713008188702, + "learning_rate": 2.6060265331524114e-06, + "loss": 0.20211085677146912, + "step": 3145 + }, + { + "epoch": 1.5545533176819473, + "grad_norm": 1.3930467289400041, + "learning_rate": 2.6005279929304918e-06, + "loss": 0.24264919757843018, + "step": 3146 + }, + { + "epoch": 1.5550475719757815, + "grad_norm": 1.316241217623005, + "learning_rate": 2.595034392490354e-06, + "loss": 0.2722601294517517, + "step": 3147 + }, + { + "epoch": 1.5555418262696157, + "grad_norm": 1.3463437829147908, + "learning_rate": 2.58954573549946e-06, + "loss": 0.26061201095581055, + "step": 3148 + }, + { + "epoch": 1.5560360805634499, + "grad_norm": 1.3701131034296847, + "learning_rate": 2.5840620256219464e-06, + "loss": 0.20620305836200714, + "step": 3149 + }, + { + "epoch": 1.556530334857284, + "grad_norm": 1.3323948648350379, + "learning_rate": 2.578583266518664e-06, + "loss": 0.2424723207950592, + "step": 3150 + }, + { + "epoch": 1.5570245891511183, + "grad_norm": 1.4286998078779003, + "learning_rate": 2.573109461847153e-06, + "loss": 0.248019739985466, + "step": 3151 + }, + { + "epoch": 1.5575188434449525, + "grad_norm": 1.2753051030343154, + "learning_rate": 2.5676406152616483e-06, + "loss": 0.23162522912025452, + "step": 3152 + }, + { + "epoch": 1.5580130977387867, + "grad_norm": 1.6072180292151754, + "learning_rate": 2.562176730413074e-06, + "loss": 0.20099176466464996, + "step": 3153 + }, + { + "epoch": 1.558507352032621, + "grad_norm": 1.4868098360756863, + "learning_rate": 2.5567178109490433e-06, + "loss": 0.27957430481910706, + "step": 3154 + }, + { + "epoch": 1.5590016063264551, + "grad_norm": 1.248830156095604, + "learning_rate": 2.551263860513845e-06, + "loss": 0.23941464722156525, + "step": 3155 + }, + { + "epoch": 1.559495860620289, + "grad_norm": 1.4371594834198067, + "learning_rate": 2.5458148827484695e-06, + "loss": 0.24910275638103485, + "step": 3156 + }, + { + "epoch": 1.5599901149141233, + "grad_norm": 1.325153365111165, + "learning_rate": 2.540370881290568e-06, + "loss": 0.26430344581604004, + "step": 3157 + }, + { + "epoch": 1.5604843692079575, + "grad_norm": 1.419775898075986, + "learning_rate": 2.534931859774481e-06, + "loss": 0.2833614945411682, + "step": 3158 + }, + { + "epoch": 1.5609786235017915, + "grad_norm": 1.2863995969426358, + "learning_rate": 2.5294978218312215e-06, + "loss": 0.24630708992481232, + "step": 3159 + }, + { + "epoch": 1.5614728777956257, + "grad_norm": 1.398973984381973, + "learning_rate": 2.524068771088476e-06, + "loss": 0.2674857974052429, + "step": 3160 + }, + { + "epoch": 1.56196713208946, + "grad_norm": 1.34356245737179, + "learning_rate": 2.5186447111706005e-06, + "loss": 0.23531441390514374, + "step": 3161 + }, + { + "epoch": 1.5624613863832941, + "grad_norm": 1.2374731185400574, + "learning_rate": 2.5132256456986236e-06, + "loss": 0.2603223919868469, + "step": 3162 + }, + { + "epoch": 1.5629556406771283, + "grad_norm": 1.302457785178724, + "learning_rate": 2.5078115782902267e-06, + "loss": 0.220007985830307, + "step": 3163 + }, + { + "epoch": 1.5634498949709625, + "grad_norm": 1.36046018530454, + "learning_rate": 2.502402512559773e-06, + "loss": 0.22660651803016663, + "step": 3164 + }, + { + "epoch": 1.5639441492647967, + "grad_norm": 1.4627286861974862, + "learning_rate": 2.4969984521182766e-06, + "loss": 0.26425695419311523, + "step": 3165 + }, + { + "epoch": 1.564438403558631, + "grad_norm": 1.3019070428865334, + "learning_rate": 2.4915994005734057e-06, + "loss": 0.22870787978172302, + "step": 3166 + }, + { + "epoch": 1.5649326578524652, + "grad_norm": 1.2622414815912377, + "learning_rate": 2.48620536152949e-06, + "loss": 0.25734084844589233, + "step": 3167 + }, + { + "epoch": 1.5654269121462994, + "grad_norm": 1.2954820564672134, + "learning_rate": 2.4808163385875226e-06, + "loss": 0.24831843376159668, + "step": 3168 + }, + { + "epoch": 1.5659211664401336, + "grad_norm": 1.3356720372460569, + "learning_rate": 2.4754323353451284e-06, + "loss": 0.2389685958623886, + "step": 3169 + }, + { + "epoch": 1.5664154207339678, + "grad_norm": 1.33182477221405, + "learning_rate": 2.4700533553965946e-06, + "loss": 0.24750663340091705, + "step": 3170 + }, + { + "epoch": 1.5669096750278018, + "grad_norm": 1.4158946259185428, + "learning_rate": 2.4646794023328525e-06, + "loss": 0.2689003348350525, + "step": 3171 + }, + { + "epoch": 1.567403929321636, + "grad_norm": 1.32371836304635, + "learning_rate": 2.45931047974147e-06, + "loss": 0.2574145197868347, + "step": 3172 + }, + { + "epoch": 1.5678981836154702, + "grad_norm": 1.5403400973166155, + "learning_rate": 2.4539465912066706e-06, + "loss": 0.2586211562156677, + "step": 3173 + }, + { + "epoch": 1.5683924379093044, + "grad_norm": 1.340393455505496, + "learning_rate": 2.4485877403093095e-06, + "loss": 0.26383671164512634, + "step": 3174 + }, + { + "epoch": 1.5688866922031384, + "grad_norm": 1.2806590186816509, + "learning_rate": 2.4432339306268736e-06, + "loss": 0.28196123242378235, + "step": 3175 + }, + { + "epoch": 1.5693809464969726, + "grad_norm": 1.4692337066995136, + "learning_rate": 2.4378851657334923e-06, + "loss": 0.2736835181713104, + "step": 3176 + }, + { + "epoch": 1.5698752007908068, + "grad_norm": 1.3442483287569258, + "learning_rate": 2.4325414491999255e-06, + "loss": 0.2316201627254486, + "step": 3177 + }, + { + "epoch": 1.570369455084641, + "grad_norm": 1.363437265904272, + "learning_rate": 2.427202784593562e-06, + "loss": 0.23955810070037842, + "step": 3178 + }, + { + "epoch": 1.5708637093784752, + "grad_norm": 1.4240865879172782, + "learning_rate": 2.4218691754784162e-06, + "loss": 0.263042151927948, + "step": 3179 + }, + { + "epoch": 1.5713579636723094, + "grad_norm": 1.3283544396978941, + "learning_rate": 2.4165406254151312e-06, + "loss": 0.25570976734161377, + "step": 3180 + }, + { + "epoch": 1.5718522179661436, + "grad_norm": 1.3508561425487733, + "learning_rate": 2.4112171379609696e-06, + "loss": 0.2503488063812256, + "step": 3181 + }, + { + "epoch": 1.5723464722599778, + "grad_norm": 1.2731349274514334, + "learning_rate": 2.40589871666982e-06, + "loss": 0.21815824508666992, + "step": 3182 + }, + { + "epoch": 1.572840726553812, + "grad_norm": 1.4354076907799536, + "learning_rate": 2.400585365092177e-06, + "loss": 0.23936739563941956, + "step": 3183 + }, + { + "epoch": 1.5733349808476462, + "grad_norm": 1.2459112031686363, + "learning_rate": 2.3952770867751595e-06, + "loss": 0.2618086636066437, + "step": 3184 + }, + { + "epoch": 1.5738292351414804, + "grad_norm": 1.477109441631464, + "learning_rate": 2.3899738852625065e-06, + "loss": 0.2852020263671875, + "step": 3185 + }, + { + "epoch": 1.5743234894353144, + "grad_norm": 1.4364121007652697, + "learning_rate": 2.3846757640945505e-06, + "loss": 0.28860047459602356, + "step": 3186 + }, + { + "epoch": 1.5748177437291486, + "grad_norm": 1.2738328733534112, + "learning_rate": 2.3793827268082446e-06, + "loss": 0.2397383451461792, + "step": 3187 + }, + { + "epoch": 1.5753119980229828, + "grad_norm": 1.3548543446694599, + "learning_rate": 2.374094776937145e-06, + "loss": 0.25204962491989136, + "step": 3188 + }, + { + "epoch": 1.575806252316817, + "grad_norm": 1.2908932541507008, + "learning_rate": 2.368811918011411e-06, + "loss": 0.21216189861297607, + "step": 3189 + }, + { + "epoch": 1.576300506610651, + "grad_norm": 1.4719289728075926, + "learning_rate": 2.363534153557805e-06, + "loss": 0.2647620737552643, + "step": 3190 + }, + { + "epoch": 1.5767947609044852, + "grad_norm": 1.4154428976481128, + "learning_rate": 2.358261487099688e-06, + "loss": 0.3079666793346405, + "step": 3191 + }, + { + "epoch": 1.5772890151983194, + "grad_norm": 1.1847060614906242, + "learning_rate": 2.352993922157013e-06, + "loss": 0.22961711883544922, + "step": 3192 + }, + { + "epoch": 1.5777832694921536, + "grad_norm": 1.5460794294977342, + "learning_rate": 2.347731462246331e-06, + "loss": 0.2657305598258972, + "step": 3193 + }, + { + "epoch": 1.5782775237859878, + "grad_norm": 1.2622234684788671, + "learning_rate": 2.3424741108807914e-06, + "loss": 0.224237859249115, + "step": 3194 + }, + { + "epoch": 1.578771778079822, + "grad_norm": 1.4036688905605132, + "learning_rate": 2.337221871570121e-06, + "loss": 0.26459985971450806, + "step": 3195 + }, + { + "epoch": 1.5792660323736563, + "grad_norm": 1.4237290486306964, + "learning_rate": 2.331974747820641e-06, + "loss": 0.25391027331352234, + "step": 3196 + }, + { + "epoch": 1.5797602866674905, + "grad_norm": 1.3683418214908574, + "learning_rate": 2.326732743135256e-06, + "loss": 0.25822141766548157, + "step": 3197 + }, + { + "epoch": 1.5802545409613247, + "grad_norm": 1.3569651988075904, + "learning_rate": 2.3214958610134554e-06, + "loss": 0.25140073895454407, + "step": 3198 + }, + { + "epoch": 1.5807487952551589, + "grad_norm": 1.280802230226295, + "learning_rate": 2.3162641049513035e-06, + "loss": 0.2550397515296936, + "step": 3199 + }, + { + "epoch": 1.581243049548993, + "grad_norm": 1.3770416210337255, + "learning_rate": 2.3110374784414526e-06, + "loss": 0.2648996412754059, + "step": 3200 + }, + { + "epoch": 1.5817373038428273, + "grad_norm": 1.285627272529884, + "learning_rate": 2.3058159849731134e-06, + "loss": 0.235626682639122, + "step": 3201 + }, + { + "epoch": 1.5822315581366613, + "grad_norm": 1.354562155318599, + "learning_rate": 2.3005996280320873e-06, + "loss": 0.24930328130722046, + "step": 3202 + }, + { + "epoch": 1.5827258124304955, + "grad_norm": 1.299026803187305, + "learning_rate": 2.2953884111007428e-06, + "loss": 0.23712117969989777, + "step": 3203 + }, + { + "epoch": 1.5832200667243297, + "grad_norm": 1.4407443338733177, + "learning_rate": 2.290182337658007e-06, + "loss": 0.2504096031188965, + "step": 3204 + }, + { + "epoch": 1.583714321018164, + "grad_norm": 1.345261370550347, + "learning_rate": 2.2849814111793823e-06, + "loss": 0.2218465358018875, + "step": 3205 + }, + { + "epoch": 1.5842085753119979, + "grad_norm": 1.3818182639369938, + "learning_rate": 2.279785635136933e-06, + "loss": 0.2653011977672577, + "step": 3206 + }, + { + "epoch": 1.584702829605832, + "grad_norm": 1.521658991035551, + "learning_rate": 2.2745950129992853e-06, + "loss": 0.27551597356796265, + "step": 3207 + }, + { + "epoch": 1.5851970838996663, + "grad_norm": 1.2816405701256748, + "learning_rate": 2.2694095482316247e-06, + "loss": 0.21494519710540771, + "step": 3208 + }, + { + "epoch": 1.5856913381935005, + "grad_norm": 1.2804333364342155, + "learning_rate": 2.2642292442956925e-06, + "loss": 0.2517405152320862, + "step": 3209 + }, + { + "epoch": 1.5861855924873347, + "grad_norm": 1.365131298274178, + "learning_rate": 2.259054104649786e-06, + "loss": 0.25777050852775574, + "step": 3210 + }, + { + "epoch": 1.586679846781169, + "grad_norm": 1.3722239172040558, + "learning_rate": 2.2538841327487582e-06, + "loss": 0.25914469361305237, + "step": 3211 + }, + { + "epoch": 1.5871741010750031, + "grad_norm": 1.3924091851436682, + "learning_rate": 2.2487193320440017e-06, + "loss": 0.23877818882465363, + "step": 3212 + }, + { + "epoch": 1.5876683553688373, + "grad_norm": 1.2757007530985867, + "learning_rate": 2.2435597059834635e-06, + "loss": 0.2226967066526413, + "step": 3213 + }, + { + "epoch": 1.5881626096626715, + "grad_norm": 1.400079876174728, + "learning_rate": 2.2384052580116465e-06, + "loss": 0.28768399357795715, + "step": 3214 + }, + { + "epoch": 1.5886568639565057, + "grad_norm": 1.3700126786923876, + "learning_rate": 2.233255991569575e-06, + "loss": 0.2563883662223816, + "step": 3215 + }, + { + "epoch": 1.58915111825034, + "grad_norm": 1.3688176323163237, + "learning_rate": 2.2281119100948322e-06, + "loss": 0.2595394551753998, + "step": 3216 + }, + { + "epoch": 1.589645372544174, + "grad_norm": 1.2924408591101029, + "learning_rate": 2.2229730170215324e-06, + "loss": 0.2354460060596466, + "step": 3217 + }, + { + "epoch": 1.5901396268380081, + "grad_norm": 1.3015321221613778, + "learning_rate": 2.2178393157803225e-06, + "loss": 0.2397463619709015, + "step": 3218 + }, + { + "epoch": 1.5906338811318423, + "grad_norm": 1.5213512082778142, + "learning_rate": 2.212710809798393e-06, + "loss": 0.3304588794708252, + "step": 3219 + }, + { + "epoch": 1.5911281354256765, + "grad_norm": 1.2850321771259765, + "learning_rate": 2.207587502499464e-06, + "loss": 0.23891952633857727, + "step": 3220 + }, + { + "epoch": 1.5916223897195105, + "grad_norm": 1.3962733600240735, + "learning_rate": 2.2024693973037747e-06, + "loss": 0.2544774115085602, + "step": 3221 + }, + { + "epoch": 1.5921166440133447, + "grad_norm": 1.4193118785950918, + "learning_rate": 2.1973564976281003e-06, + "loss": 0.2620859444141388, + "step": 3222 + }, + { + "epoch": 1.592610898307179, + "grad_norm": 1.2794541555838774, + "learning_rate": 2.192248806885747e-06, + "loss": 0.22541281580924988, + "step": 3223 + }, + { + "epoch": 1.5931051526010132, + "grad_norm": 1.2886654382919192, + "learning_rate": 2.187146328486529e-06, + "loss": 0.23454351723194122, + "step": 3224 + }, + { + "epoch": 1.5935994068948474, + "grad_norm": 1.3236984572567387, + "learning_rate": 2.18204906583679e-06, + "loss": 0.24848732352256775, + "step": 3225 + }, + { + "epoch": 1.5940936611886816, + "grad_norm": 1.2049251354008288, + "learning_rate": 2.176957022339389e-06, + "loss": 0.21949590742588043, + "step": 3226 + }, + { + "epoch": 1.5945879154825158, + "grad_norm": 1.3436660246382202, + "learning_rate": 2.171870201393703e-06, + "loss": 0.27300944924354553, + "step": 3227 + }, + { + "epoch": 1.59508216977635, + "grad_norm": 1.3272049247129862, + "learning_rate": 2.16678860639562e-06, + "loss": 0.23850613832473755, + "step": 3228 + }, + { + "epoch": 1.5955764240701842, + "grad_norm": 1.4611172116234663, + "learning_rate": 2.1617122407375424e-06, + "loss": 0.2728792428970337, + "step": 3229 + }, + { + "epoch": 1.5960706783640184, + "grad_norm": 1.4623582491499052, + "learning_rate": 2.1566411078083726e-06, + "loss": 0.2321755588054657, + "step": 3230 + }, + { + "epoch": 1.5965649326578526, + "grad_norm": 1.5111460470858884, + "learning_rate": 2.1515752109935374e-06, + "loss": 0.30118101835250854, + "step": 3231 + }, + { + "epoch": 1.5970591869516868, + "grad_norm": 1.2041348970592753, + "learning_rate": 2.1465145536749475e-06, + "loss": 0.22317390143871307, + "step": 3232 + }, + { + "epoch": 1.5975534412455208, + "grad_norm": 1.4530812438401597, + "learning_rate": 2.141459139231029e-06, + "loss": 0.2906285524368286, + "step": 3233 + }, + { + "epoch": 1.598047695539355, + "grad_norm": 1.3996891865587815, + "learning_rate": 2.136408971036704e-06, + "loss": 0.24645069241523743, + "step": 3234 + }, + { + "epoch": 1.5985419498331892, + "grad_norm": 1.4725365119055005, + "learning_rate": 2.1313640524633927e-06, + "loss": 0.26764121651649475, + "step": 3235 + }, + { + "epoch": 1.5990362041270234, + "grad_norm": 1.4093554565168636, + "learning_rate": 2.126324386879012e-06, + "loss": 0.2811397910118103, + "step": 3236 + }, + { + "epoch": 1.5995304584208574, + "grad_norm": 1.5104838755570678, + "learning_rate": 2.121289977647971e-06, + "loss": 0.254316508769989, + "step": 3237 + }, + { + "epoch": 1.6000247127146916, + "grad_norm": 1.378432417546232, + "learning_rate": 2.1162608281311636e-06, + "loss": 0.2479352205991745, + "step": 3238 + }, + { + "epoch": 1.6005189670085258, + "grad_norm": 1.200711868039053, + "learning_rate": 2.1112369416859847e-06, + "loss": 0.22767537832260132, + "step": 3239 + }, + { + "epoch": 1.60101322130236, + "grad_norm": 1.3194506024522585, + "learning_rate": 2.106218321666309e-06, + "loss": 0.24286411702632904, + "step": 3240 + }, + { + "epoch": 1.6015074755961942, + "grad_norm": 1.4143801874217299, + "learning_rate": 2.1012049714224914e-06, + "loss": 0.22960595786571503, + "step": 3241 + }, + { + "epoch": 1.6020017298900284, + "grad_norm": 1.3710437918045983, + "learning_rate": 2.0961968943013742e-06, + "loss": 0.2448965162038803, + "step": 3242 + }, + { + "epoch": 1.6024959841838626, + "grad_norm": 1.3544107087641921, + "learning_rate": 2.0911940936462794e-06, + "loss": 0.23486846685409546, + "step": 3243 + }, + { + "epoch": 1.6029902384776968, + "grad_norm": 1.576868207611872, + "learning_rate": 2.0861965727970045e-06, + "loss": 0.2470572590827942, + "step": 3244 + }, + { + "epoch": 1.603484492771531, + "grad_norm": 1.3371604672673962, + "learning_rate": 2.0812043350898226e-06, + "loss": 0.283765971660614, + "step": 3245 + }, + { + "epoch": 1.6039787470653653, + "grad_norm": 1.3089763025714083, + "learning_rate": 2.076217383857484e-06, + "loss": 0.24943199753761292, + "step": 3246 + }, + { + "epoch": 1.6044730013591995, + "grad_norm": 1.2872721191375163, + "learning_rate": 2.0712357224291966e-06, + "loss": 0.22150146961212158, + "step": 3247 + }, + { + "epoch": 1.6049672556530334, + "grad_norm": 1.3204310548386595, + "learning_rate": 2.0662593541306563e-06, + "loss": 0.2610163390636444, + "step": 3248 + }, + { + "epoch": 1.6054615099468676, + "grad_norm": 1.243779369506435, + "learning_rate": 2.0612882822840154e-06, + "loss": 0.22789397835731506, + "step": 3249 + }, + { + "epoch": 1.6059557642407019, + "grad_norm": 1.3699765130937176, + "learning_rate": 2.056322510207882e-06, + "loss": 0.22956407070159912, + "step": 3250 + }, + { + "epoch": 1.606450018534536, + "grad_norm": 1.3752485526796745, + "learning_rate": 2.051362041217341e-06, + "loss": 0.2579299509525299, + "step": 3251 + }, + { + "epoch": 1.60694427282837, + "grad_norm": 1.449594870075983, + "learning_rate": 2.046406878623929e-06, + "loss": 0.24655218422412872, + "step": 3252 + }, + { + "epoch": 1.6074385271222043, + "grad_norm": 1.8413073723455704, + "learning_rate": 2.0414570257356415e-06, + "loss": 0.2325882464647293, + "step": 3253 + }, + { + "epoch": 1.6079327814160385, + "grad_norm": 1.3704743037638702, + "learning_rate": 2.0365124858569294e-06, + "loss": 0.2678581476211548, + "step": 3254 + }, + { + "epoch": 1.6084270357098727, + "grad_norm": 1.3329052595945479, + "learning_rate": 2.0315732622886976e-06, + "loss": 0.23200136423110962, + "step": 3255 + }, + { + "epoch": 1.6089212900037069, + "grad_norm": 1.3902434854443921, + "learning_rate": 2.0266393583283015e-06, + "loss": 0.24957536160945892, + "step": 3256 + }, + { + "epoch": 1.609415544297541, + "grad_norm": 1.3655551679458238, + "learning_rate": 2.0217107772695467e-06, + "loss": 0.2506657540798187, + "step": 3257 + }, + { + "epoch": 1.6099097985913753, + "grad_norm": 1.3749666602598227, + "learning_rate": 2.0167875224026788e-06, + "loss": 0.22255182266235352, + "step": 3258 + }, + { + "epoch": 1.6104040528852095, + "grad_norm": 1.222166534445823, + "learning_rate": 2.011869597014392e-06, + "loss": 0.2489611655473709, + "step": 3259 + }, + { + "epoch": 1.6108983071790437, + "grad_norm": 1.3283456479938487, + "learning_rate": 2.0069570043878305e-06, + "loss": 0.24808533489704132, + "step": 3260 + }, + { + "epoch": 1.611392561472878, + "grad_norm": 1.3963788000978605, + "learning_rate": 2.0020497478025635e-06, + "loss": 0.24013441801071167, + "step": 3261 + }, + { + "epoch": 1.6118868157667121, + "grad_norm": 1.4500389435817727, + "learning_rate": 1.997147830534608e-06, + "loss": 0.2813841998577118, + "step": 3262 + }, + { + "epoch": 1.612381070060546, + "grad_norm": 1.4817798618081903, + "learning_rate": 1.9922512558564154e-06, + "loss": 0.23727375268936157, + "step": 3263 + }, + { + "epoch": 1.6128753243543803, + "grad_norm": 1.3341145171932982, + "learning_rate": 1.9873600270368664e-06, + "loss": 0.2341655194759369, + "step": 3264 + }, + { + "epoch": 1.6133695786482145, + "grad_norm": 1.4911949653625025, + "learning_rate": 1.9824741473412768e-06, + "loss": 0.32069963216781616, + "step": 3265 + }, + { + "epoch": 1.6138638329420487, + "grad_norm": 1.4329702924332965, + "learning_rate": 1.977593620031393e-06, + "loss": 0.2414681762456894, + "step": 3266 + }, + { + "epoch": 1.6143580872358827, + "grad_norm": 1.4261430753271709, + "learning_rate": 1.9727184483653793e-06, + "loss": 0.25517842173576355, + "step": 3267 + }, + { + "epoch": 1.614852341529717, + "grad_norm": 1.4664579571771421, + "learning_rate": 1.967848635597831e-06, + "loss": 0.28264889121055603, + "step": 3268 + }, + { + "epoch": 1.6153465958235511, + "grad_norm": 1.2722226456356633, + "learning_rate": 1.962984184979774e-06, + "loss": 0.24543075263500214, + "step": 3269 + }, + { + "epoch": 1.6158408501173853, + "grad_norm": 1.3971489540759634, + "learning_rate": 1.9581250997586366e-06, + "loss": 0.2770763039588928, + "step": 3270 + }, + { + "epoch": 1.6163351044112195, + "grad_norm": 1.362601366326608, + "learning_rate": 1.953271383178278e-06, + "loss": 0.2521423101425171, + "step": 3271 + }, + { + "epoch": 1.6168293587050537, + "grad_norm": 1.262726405313237, + "learning_rate": 1.9484230384789702e-06, + "loss": 0.2402455359697342, + "step": 3272 + }, + { + "epoch": 1.617323612998888, + "grad_norm": 1.4929089203163604, + "learning_rate": 1.9435800688974005e-06, + "loss": 0.2947021424770355, + "step": 3273 + }, + { + "epoch": 1.6178178672927221, + "grad_norm": 1.2382587228414774, + "learning_rate": 1.938742477666663e-06, + "loss": 0.22238701581954956, + "step": 3274 + }, + { + "epoch": 1.6183121215865564, + "grad_norm": 1.2835510888376274, + "learning_rate": 1.933910268016269e-06, + "loss": 0.25475019216537476, + "step": 3275 + }, + { + "epoch": 1.6188063758803906, + "grad_norm": 1.2545564646453307, + "learning_rate": 1.929083443172125e-06, + "loss": 0.2316315472126007, + "step": 3276 + }, + { + "epoch": 1.6193006301742248, + "grad_norm": 1.2739392933893041, + "learning_rate": 1.9242620063565598e-06, + "loss": 0.24977952241897583, + "step": 3277 + }, + { + "epoch": 1.619794884468059, + "grad_norm": 1.3712132397422443, + "learning_rate": 1.9194459607882887e-06, + "loss": 0.24006152153015137, + "step": 3278 + }, + { + "epoch": 1.620289138761893, + "grad_norm": 1.2866259343493134, + "learning_rate": 1.9146353096824366e-06, + "loss": 0.26050522923469543, + "step": 3279 + }, + { + "epoch": 1.6207833930557272, + "grad_norm": 1.338449999730035, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.2698773443698883, + "step": 3280 + }, + { + "epoch": 1.6212776473495614, + "grad_norm": 1.480404977138073, + "learning_rate": 1.9050302037004765e-06, + "loss": 0.2627784013748169, + "step": 3281 + }, + { + "epoch": 1.6217719016433956, + "grad_norm": 1.3335627547093958, + "learning_rate": 1.900235755236599e-06, + "loss": 0.24261148273944855, + "step": 3282 + }, + { + "epoch": 1.6222661559372296, + "grad_norm": 1.347149973540751, + "learning_rate": 1.8954467140596023e-06, + "loss": 0.24689635634422302, + "step": 3283 + }, + { + "epoch": 1.6227604102310638, + "grad_norm": 1.4586477344669697, + "learning_rate": 1.890663083366574e-06, + "loss": 0.2885867953300476, + "step": 3284 + }, + { + "epoch": 1.623254664524898, + "grad_norm": 1.2981242679817548, + "learning_rate": 1.8858848663510066e-06, + "loss": 0.2624407112598419, + "step": 3285 + }, + { + "epoch": 1.6237489188187322, + "grad_norm": 1.4544775837390882, + "learning_rate": 1.881112066202767e-06, + "loss": 0.27705928683280945, + "step": 3286 + }, + { + "epoch": 1.6242431731125664, + "grad_norm": 1.4465119903360202, + "learning_rate": 1.8763446861081058e-06, + "loss": 0.26406094431877136, + "step": 3287 + }, + { + "epoch": 1.6247374274064006, + "grad_norm": 1.3239739188563808, + "learning_rate": 1.8715827292496557e-06, + "loss": 0.26495790481567383, + "step": 3288 + }, + { + "epoch": 1.6252316817002348, + "grad_norm": 1.419298583557058, + "learning_rate": 1.8668261988064406e-06, + "loss": 0.24995195865631104, + "step": 3289 + }, + { + "epoch": 1.625725935994069, + "grad_norm": 1.4058286500391235, + "learning_rate": 1.8620750979538437e-06, + "loss": 0.23043034970760345, + "step": 3290 + }, + { + "epoch": 1.6262201902879032, + "grad_norm": 1.3959905154788135, + "learning_rate": 1.8573294298636334e-06, + "loss": 0.2590731978416443, + "step": 3291 + }, + { + "epoch": 1.6267144445817374, + "grad_norm": 1.3919450960931963, + "learning_rate": 1.8525891977039557e-06, + "loss": 0.24246811866760254, + "step": 3292 + }, + { + "epoch": 1.6272086988755716, + "grad_norm": 1.2790623939923147, + "learning_rate": 1.847854404639311e-06, + "loss": 0.2386825680732727, + "step": 3293 + }, + { + "epoch": 1.6277029531694056, + "grad_norm": 1.3168324939527787, + "learning_rate": 1.843125053830588e-06, + "loss": 0.2243885099887848, + "step": 3294 + }, + { + "epoch": 1.6281972074632398, + "grad_norm": 1.264397606173487, + "learning_rate": 1.838401148435035e-06, + "loss": 0.24984796345233917, + "step": 3295 + }, + { + "epoch": 1.628691461757074, + "grad_norm": 1.3015406971863621, + "learning_rate": 1.8336826916062568e-06, + "loss": 0.22784638404846191, + "step": 3296 + }, + { + "epoch": 1.6291857160509082, + "grad_norm": 1.4178841831435534, + "learning_rate": 1.828969686494232e-06, + "loss": 0.24812597036361694, + "step": 3297 + }, + { + "epoch": 1.6296799703447422, + "grad_norm": 1.2132930880582795, + "learning_rate": 1.8242621362452939e-06, + "loss": 0.234031543135643, + "step": 3298 + }, + { + "epoch": 1.6301742246385764, + "grad_norm": 1.251471335677166, + "learning_rate": 1.8195600440021377e-06, + "loss": 0.22455371916294098, + "step": 3299 + }, + { + "epoch": 1.6306684789324106, + "grad_norm": 1.2488121980955387, + "learning_rate": 1.8148634129038113e-06, + "loss": 0.22605910897254944, + "step": 3300 + }, + { + "epoch": 1.6311627332262448, + "grad_norm": 1.3484568896035969, + "learning_rate": 1.8101722460857184e-06, + "loss": 0.2527684271335602, + "step": 3301 + }, + { + "epoch": 1.631656987520079, + "grad_norm": 1.355365003110194, + "learning_rate": 1.8054865466796167e-06, + "loss": 0.24625766277313232, + "step": 3302 + }, + { + "epoch": 1.6321512418139132, + "grad_norm": 1.7061568076136007, + "learning_rate": 1.8008063178136125e-06, + "loss": 0.31236231327056885, + "step": 3303 + }, + { + "epoch": 1.6326454961077475, + "grad_norm": 1.3433390649211776, + "learning_rate": 1.7961315626121566e-06, + "loss": 0.21256005764007568, + "step": 3304 + }, + { + "epoch": 1.6331397504015817, + "grad_norm": 1.5039173087965194, + "learning_rate": 1.7914622841960482e-06, + "loss": 0.25238949060440063, + "step": 3305 + }, + { + "epoch": 1.6336340046954159, + "grad_norm": 1.3709723014330413, + "learning_rate": 1.7867984856824382e-06, + "loss": 0.29630619287490845, + "step": 3306 + }, + { + "epoch": 1.63412825898925, + "grad_norm": 1.3891654533842075, + "learning_rate": 1.782140170184804e-06, + "loss": 0.26159363985061646, + "step": 3307 + }, + { + "epoch": 1.6346225132830843, + "grad_norm": 1.2884457367333761, + "learning_rate": 1.7774873408129733e-06, + "loss": 0.22361448407173157, + "step": 3308 + }, + { + "epoch": 1.6351167675769185, + "grad_norm": 1.410142665529872, + "learning_rate": 1.7728400006731083e-06, + "loss": 0.23890942335128784, + "step": 3309 + }, + { + "epoch": 1.6356110218707525, + "grad_norm": 1.3147986477314286, + "learning_rate": 1.7681981528677073e-06, + "loss": 0.23067504167556763, + "step": 3310 + }, + { + "epoch": 1.6361052761645867, + "grad_norm": 1.4202307068972662, + "learning_rate": 1.7635618004956012e-06, + "loss": 0.24790561199188232, + "step": 3311 + }, + { + "epoch": 1.6365995304584209, + "grad_norm": 1.3183461895569366, + "learning_rate": 1.7589309466519556e-06, + "loss": 0.2590476870536804, + "step": 3312 + }, + { + "epoch": 1.637093784752255, + "grad_norm": 1.4033807602679105, + "learning_rate": 1.754305594428254e-06, + "loss": 0.26833316683769226, + "step": 3313 + }, + { + "epoch": 1.637588039046089, + "grad_norm": 1.2949590395956057, + "learning_rate": 1.749685746912323e-06, + "loss": 0.23390671610832214, + "step": 3314 + }, + { + "epoch": 1.6380822933399233, + "grad_norm": 1.350070481785481, + "learning_rate": 1.7450714071883079e-06, + "loss": 0.2760172188282013, + "step": 3315 + }, + { + "epoch": 1.6385765476337575, + "grad_norm": 1.387338184553767, + "learning_rate": 1.7404625783366703e-06, + "loss": 0.255672812461853, + "step": 3316 + }, + { + "epoch": 1.6390708019275917, + "grad_norm": 1.4073549622144716, + "learning_rate": 1.7358592634342008e-06, + "loss": 0.26336947083473206, + "step": 3317 + }, + { + "epoch": 1.639565056221426, + "grad_norm": 1.2609217918610456, + "learning_rate": 1.7312614655540071e-06, + "loss": 0.2308199107646942, + "step": 3318 + }, + { + "epoch": 1.64005931051526, + "grad_norm": 1.335188741822115, + "learning_rate": 1.7266691877655129e-06, + "loss": 0.24762676656246185, + "step": 3319 + }, + { + "epoch": 1.6405535648090943, + "grad_norm": 1.3287358421539026, + "learning_rate": 1.7220824331344577e-06, + "loss": 0.2175157219171524, + "step": 3320 + }, + { + "epoch": 1.6410478191029285, + "grad_norm": 1.3635707435478155, + "learning_rate": 1.7175012047228956e-06, + "loss": 0.24319039285182953, + "step": 3321 + }, + { + "epoch": 1.6415420733967627, + "grad_norm": 1.2272267263054326, + "learning_rate": 1.7129255055891813e-06, + "loss": 0.21708521246910095, + "step": 3322 + }, + { + "epoch": 1.642036327690597, + "grad_norm": 1.4404881849035673, + "learning_rate": 1.7083553387879969e-06, + "loss": 0.28576910495758057, + "step": 3323 + }, + { + "epoch": 1.6425305819844311, + "grad_norm": 1.3120467826579518, + "learning_rate": 1.703790707370313e-06, + "loss": 0.2664312720298767, + "step": 3324 + }, + { + "epoch": 1.6430248362782651, + "grad_norm": 1.5950926505285568, + "learning_rate": 1.6992316143834142e-06, + "loss": 0.23930951952934265, + "step": 3325 + }, + { + "epoch": 1.6435190905720993, + "grad_norm": 1.3985303284465023, + "learning_rate": 1.694678062870886e-06, + "loss": 0.2741955518722534, + "step": 3326 + }, + { + "epoch": 1.6440133448659335, + "grad_norm": 1.2830935776841221, + "learning_rate": 1.6901300558726142e-06, + "loss": 0.25177690386772156, + "step": 3327 + }, + { + "epoch": 1.6445075991597677, + "grad_norm": 1.4111945712412088, + "learning_rate": 1.6855875964247837e-06, + "loss": 0.26517611742019653, + "step": 3328 + }, + { + "epoch": 1.6450018534536017, + "grad_norm": 1.227994601145186, + "learning_rate": 1.6810506875598776e-06, + "loss": 0.2294573187828064, + "step": 3329 + }, + { + "epoch": 1.645496107747436, + "grad_norm": 1.3101987526620804, + "learning_rate": 1.6765193323066653e-06, + "loss": 0.23062998056411743, + "step": 3330 + }, + { + "epoch": 1.6459903620412701, + "grad_norm": 1.4687005380243534, + "learning_rate": 1.6719935336902205e-06, + "loss": 0.3047422468662262, + "step": 3331 + }, + { + "epoch": 1.6464846163351043, + "grad_norm": 1.4214345840675306, + "learning_rate": 1.6674732947319017e-06, + "loss": 0.2715694308280945, + "step": 3332 + }, + { + "epoch": 1.6469788706289386, + "grad_norm": 1.3486732362780178, + "learning_rate": 1.6629586184493519e-06, + "loss": 0.20359721779823303, + "step": 3333 + }, + { + "epoch": 1.6474731249227728, + "grad_norm": 1.256842666883273, + "learning_rate": 1.6584495078565045e-06, + "loss": 0.20083262026309967, + "step": 3334 + }, + { + "epoch": 1.647967379216607, + "grad_norm": 1.2824441486710174, + "learning_rate": 1.6539459659635848e-06, + "loss": 0.2274707555770874, + "step": 3335 + }, + { + "epoch": 1.6484616335104412, + "grad_norm": 1.4170790489583633, + "learning_rate": 1.6494479957770847e-06, + "loss": 0.2654137909412384, + "step": 3336 + }, + { + "epoch": 1.6489558878042754, + "grad_norm": 1.2207871831065553, + "learning_rate": 1.644955600299788e-06, + "loss": 0.24672716856002808, + "step": 3337 + }, + { + "epoch": 1.6494501420981096, + "grad_norm": 2.7206661248050494, + "learning_rate": 1.640468782530753e-06, + "loss": 0.21563802659511566, + "step": 3338 + }, + { + "epoch": 1.6499443963919438, + "grad_norm": 1.2772497258385302, + "learning_rate": 1.6359875454653151e-06, + "loss": 0.22986169159412384, + "step": 3339 + }, + { + "epoch": 1.650438650685778, + "grad_norm": 1.1914212857874291, + "learning_rate": 1.6315118920950857e-06, + "loss": 0.22981731593608856, + "step": 3340 + }, + { + "epoch": 1.650932904979612, + "grad_norm": 1.423180347857553, + "learning_rate": 1.6270418254079478e-06, + "loss": 0.25922536849975586, + "step": 3341 + }, + { + "epoch": 1.6514271592734462, + "grad_norm": 1.3808711162643625, + "learning_rate": 1.6225773483880503e-06, + "loss": 0.23273468017578125, + "step": 3342 + }, + { + "epoch": 1.6519214135672804, + "grad_norm": 1.3019728240659525, + "learning_rate": 1.6181184640158165e-06, + "loss": 0.22988896071910858, + "step": 3343 + }, + { + "epoch": 1.6524156678611144, + "grad_norm": 1.3674976753844925, + "learning_rate": 1.6136651752679333e-06, + "loss": 0.2628646790981293, + "step": 3344 + }, + { + "epoch": 1.6529099221549486, + "grad_norm": 1.3498513177046836, + "learning_rate": 1.6092174851173526e-06, + "loss": 0.24670086801052094, + "step": 3345 + }, + { + "epoch": 1.6534041764487828, + "grad_norm": 1.3175919767027275, + "learning_rate": 1.6047753965332902e-06, + "loss": 0.27845436334609985, + "step": 3346 + }, + { + "epoch": 1.653898430742617, + "grad_norm": 1.30200656487082, + "learning_rate": 1.6003389124812185e-06, + "loss": 0.25297483801841736, + "step": 3347 + }, + { + "epoch": 1.6543926850364512, + "grad_norm": 1.237195636484559, + "learning_rate": 1.595908035922873e-06, + "loss": 0.18876859545707703, + "step": 3348 + }, + { + "epoch": 1.6548869393302854, + "grad_norm": 1.3417621492525376, + "learning_rate": 1.591482769816246e-06, + "loss": 0.23852673172950745, + "step": 3349 + }, + { + "epoch": 1.6553811936241196, + "grad_norm": 1.3350614987774176, + "learning_rate": 1.587063117115576e-06, + "loss": 0.2569701373577118, + "step": 3350 + }, + { + "epoch": 1.6558754479179538, + "grad_norm": 1.354350083762125, + "learning_rate": 1.582649080771359e-06, + "loss": 0.29305699467658997, + "step": 3351 + }, + { + "epoch": 1.656369702211788, + "grad_norm": 1.42534989112271, + "learning_rate": 1.5782406637303527e-06, + "loss": 0.28942832350730896, + "step": 3352 + }, + { + "epoch": 1.6568639565056222, + "grad_norm": 1.351062882636418, + "learning_rate": 1.5738378689355439e-06, + "loss": 0.27491068840026855, + "step": 3353 + }, + { + "epoch": 1.6573582107994564, + "grad_norm": 1.4736732865815314, + "learning_rate": 1.569440699326179e-06, + "loss": 0.26730844378471375, + "step": 3354 + }, + { + "epoch": 1.6578524650932907, + "grad_norm": 1.3194299490413177, + "learning_rate": 1.5650491578377458e-06, + "loss": 0.23610982298851013, + "step": 3355 + }, + { + "epoch": 1.6583467193871246, + "grad_norm": 1.5894671595119023, + "learning_rate": 1.5606632474019734e-06, + "loss": 0.26817262172698975, + "step": 3356 + }, + { + "epoch": 1.6588409736809588, + "grad_norm": 1.4847304906222882, + "learning_rate": 1.556282970946833e-06, + "loss": 0.2403341382741928, + "step": 3357 + }, + { + "epoch": 1.659335227974793, + "grad_norm": 1.4109665373138245, + "learning_rate": 1.5519083313965378e-06, + "loss": 0.24433058500289917, + "step": 3358 + }, + { + "epoch": 1.6598294822686273, + "grad_norm": 1.2685951523616033, + "learning_rate": 1.5475393316715282e-06, + "loss": 0.2526702582836151, + "step": 3359 + }, + { + "epoch": 1.6603237365624612, + "grad_norm": 1.3373930264060108, + "learning_rate": 1.543175974688491e-06, + "loss": 0.24032334983348846, + "step": 3360 + }, + { + "epoch": 1.6608179908562954, + "grad_norm": 1.3759465001084996, + "learning_rate": 1.5388182633603433e-06, + "loss": 0.27770349383354187, + "step": 3361 + }, + { + "epoch": 1.6613122451501297, + "grad_norm": 1.5590715119269358, + "learning_rate": 1.534466200596224e-06, + "loss": 0.26002752780914307, + "step": 3362 + }, + { + "epoch": 1.6618064994439639, + "grad_norm": 1.867324678142589, + "learning_rate": 1.5301197893015129e-06, + "loss": 0.2707037329673767, + "step": 3363 + }, + { + "epoch": 1.662300753737798, + "grad_norm": 1.3300911116600942, + "learning_rate": 1.52577903237781e-06, + "loss": 0.27249252796173096, + "step": 3364 + }, + { + "epoch": 1.6627950080316323, + "grad_norm": 1.341030721831506, + "learning_rate": 1.5214439327229425e-06, + "loss": 0.22495020925998688, + "step": 3365 + }, + { + "epoch": 1.6632892623254665, + "grad_norm": 1.4580410293752506, + "learning_rate": 1.5171144932309622e-06, + "loss": 0.23561973869800568, + "step": 3366 + }, + { + "epoch": 1.6637835166193007, + "grad_norm": 1.4580927261417298, + "learning_rate": 1.512790716792143e-06, + "loss": 0.2689869701862335, + "step": 3367 + }, + { + "epoch": 1.664277770913135, + "grad_norm": 1.2734577307213573, + "learning_rate": 1.5084726062929688e-06, + "loss": 0.22249455749988556, + "step": 3368 + }, + { + "epoch": 1.664772025206969, + "grad_norm": 1.444110335390912, + "learning_rate": 1.5041601646161585e-06, + "loss": 0.24586130678653717, + "step": 3369 + }, + { + "epoch": 1.6652662795008033, + "grad_norm": 1.3250583547488792, + "learning_rate": 1.499853394640629e-06, + "loss": 0.2549409568309784, + "step": 3370 + }, + { + "epoch": 1.6657605337946373, + "grad_norm": 1.4135792596464256, + "learning_rate": 1.4955522992415206e-06, + "loss": 0.2517774999141693, + "step": 3371 + }, + { + "epoch": 1.6662547880884715, + "grad_norm": 1.6132674993246225, + "learning_rate": 1.491256881290184e-06, + "loss": 0.2627662420272827, + "step": 3372 + }, + { + "epoch": 1.6667490423823057, + "grad_norm": 1.250156659660365, + "learning_rate": 1.4869671436541788e-06, + "loss": 0.25203272700309753, + "step": 3373 + }, + { + "epoch": 1.66724329667614, + "grad_norm": 1.3035778741812132, + "learning_rate": 1.482683089197271e-06, + "loss": 0.2206164300441742, + "step": 3374 + }, + { + "epoch": 1.667737550969974, + "grad_norm": 1.4034071560123977, + "learning_rate": 1.4784047207794383e-06, + "loss": 0.2551203966140747, + "step": 3375 + }, + { + "epoch": 1.668231805263808, + "grad_norm": 1.4247468939554981, + "learning_rate": 1.4741320412568505e-06, + "loss": 0.2592264711856842, + "step": 3376 + }, + { + "epoch": 1.6687260595576423, + "grad_norm": 1.3609833066581156, + "learning_rate": 1.4698650534818936e-06, + "loss": 0.25902658700942993, + "step": 3377 + }, + { + "epoch": 1.6692203138514765, + "grad_norm": 1.5283083080675575, + "learning_rate": 1.4656037603031491e-06, + "loss": 0.2685459852218628, + "step": 3378 + }, + { + "epoch": 1.6697145681453107, + "grad_norm": 1.2083368696295387, + "learning_rate": 1.4613481645653914e-06, + "loss": 0.21010839939117432, + "step": 3379 + }, + { + "epoch": 1.670208822439145, + "grad_norm": 1.3019618254178054, + "learning_rate": 1.4570982691095925e-06, + "loss": 0.23318082094192505, + "step": 3380 + }, + { + "epoch": 1.6707030767329791, + "grad_norm": 1.346937478273973, + "learning_rate": 1.4528540767729315e-06, + "loss": 0.25045326352119446, + "step": 3381 + }, + { + "epoch": 1.6711973310268133, + "grad_norm": 1.5157571774504706, + "learning_rate": 1.4486155903887623e-06, + "loss": 0.2436288446187973, + "step": 3382 + }, + { + "epoch": 1.6716915853206475, + "grad_norm": 1.2766580343897052, + "learning_rate": 1.444382812786641e-06, + "loss": 0.20454761385917664, + "step": 3383 + }, + { + "epoch": 1.6721858396144818, + "grad_norm": 1.3207693230256567, + "learning_rate": 1.4401557467923089e-06, + "loss": 0.24906963109970093, + "step": 3384 + }, + { + "epoch": 1.672680093908316, + "grad_norm": 1.3391460516330347, + "learning_rate": 1.435934395227695e-06, + "loss": 0.2552015483379364, + "step": 3385 + }, + { + "epoch": 1.6731743482021502, + "grad_norm": 1.3523733680416914, + "learning_rate": 1.4317187609109129e-06, + "loss": 0.2393915057182312, + "step": 3386 + }, + { + "epoch": 1.6736686024959841, + "grad_norm": 1.370539563215592, + "learning_rate": 1.4275088466562625e-06, + "loss": 0.2607477009296417, + "step": 3387 + }, + { + "epoch": 1.6741628567898184, + "grad_norm": 1.3296614147148798, + "learning_rate": 1.423304655274218e-06, + "loss": 0.23722632229328156, + "step": 3388 + }, + { + "epoch": 1.6746571110836526, + "grad_norm": 1.303256653854929, + "learning_rate": 1.4191061895714398e-06, + "loss": 0.2614964246749878, + "step": 3389 + }, + { + "epoch": 1.6751513653774868, + "grad_norm": 1.476448410559568, + "learning_rate": 1.4149134523507634e-06, + "loss": 0.2727823555469513, + "step": 3390 + }, + { + "epoch": 1.6756456196713208, + "grad_norm": 1.2739771939884463, + "learning_rate": 1.4107264464112003e-06, + "loss": 0.25176581740379333, + "step": 3391 + }, + { + "epoch": 1.676139873965155, + "grad_norm": 1.3087240197668597, + "learning_rate": 1.4065451745479352e-06, + "loss": 0.21339070796966553, + "step": 3392 + }, + { + "epoch": 1.6766341282589892, + "grad_norm": 1.449069234603101, + "learning_rate": 1.4023696395523267e-06, + "loss": 0.26540419459342957, + "step": 3393 + }, + { + "epoch": 1.6771283825528234, + "grad_norm": 1.3788929945945605, + "learning_rate": 1.3981998442119017e-06, + "loss": 0.2621360421180725, + "step": 3394 + }, + { + "epoch": 1.6776226368466576, + "grad_norm": 1.3149158272362809, + "learning_rate": 1.3940357913103576e-06, + "loss": 0.2578747570514679, + "step": 3395 + }, + { + "epoch": 1.6781168911404918, + "grad_norm": 1.3223117210430684, + "learning_rate": 1.3898774836275531e-06, + "loss": 0.26105010509490967, + "step": 3396 + }, + { + "epoch": 1.678611145434326, + "grad_norm": 1.277709690267506, + "learning_rate": 1.3857249239395143e-06, + "loss": 0.2221919298171997, + "step": 3397 + }, + { + "epoch": 1.6791053997281602, + "grad_norm": 1.3742911888899896, + "learning_rate": 1.3815781150184382e-06, + "loss": 0.2498932033777237, + "step": 3398 + }, + { + "epoch": 1.6795996540219944, + "grad_norm": 1.3631278461436225, + "learning_rate": 1.377437059632668e-06, + "loss": 0.29306796193122864, + "step": 3399 + }, + { + "epoch": 1.6800939083158286, + "grad_norm": 1.41106483401144, + "learning_rate": 1.3733017605467158e-06, + "loss": 0.23804892599582672, + "step": 3400 + }, + { + "epoch": 1.6805881626096628, + "grad_norm": 1.264388446305106, + "learning_rate": 1.3691722205212465e-06, + "loss": 0.18528425693511963, + "step": 3401 + }, + { + "epoch": 1.6810824169034968, + "grad_norm": 1.434400904695952, + "learning_rate": 1.365048442313085e-06, + "loss": 0.257534921169281, + "step": 3402 + }, + { + "epoch": 1.681576671197331, + "grad_norm": 1.390183210111369, + "learning_rate": 1.3609304286752034e-06, + "loss": 0.2519993782043457, + "step": 3403 + }, + { + "epoch": 1.6820709254911652, + "grad_norm": 1.5041703905686798, + "learning_rate": 1.3568181823567328e-06, + "loss": 0.27830445766448975, + "step": 3404 + }, + { + "epoch": 1.6825651797849994, + "grad_norm": 1.3496130761993563, + "learning_rate": 1.3527117061029438e-06, + "loss": 0.22532883286476135, + "step": 3405 + }, + { + "epoch": 1.6830594340788334, + "grad_norm": 1.3484913124474047, + "learning_rate": 1.3486110026552668e-06, + "loss": 0.23230011761188507, + "step": 3406 + }, + { + "epoch": 1.6835536883726676, + "grad_norm": 1.320791018685261, + "learning_rate": 1.3445160747512743e-06, + "loss": 0.24105653166770935, + "step": 3407 + }, + { + "epoch": 1.6840479426665018, + "grad_norm": 1.5077644423875391, + "learning_rate": 1.340426925124676e-06, + "loss": 0.2946394681930542, + "step": 3408 + }, + { + "epoch": 1.684542196960336, + "grad_norm": 1.403422513607122, + "learning_rate": 1.3363435565053319e-06, + "loss": 0.2682989239692688, + "step": 3409 + }, + { + "epoch": 1.6850364512541702, + "grad_norm": 1.3363195283881322, + "learning_rate": 1.332265971619241e-06, + "loss": 0.2219456285238266, + "step": 3410 + }, + { + "epoch": 1.6855307055480044, + "grad_norm": 1.2440577869208935, + "learning_rate": 1.3281941731885396e-06, + "loss": 0.22532151639461517, + "step": 3411 + }, + { + "epoch": 1.6860249598418386, + "grad_norm": 1.3951142777226702, + "learning_rate": 1.324128163931504e-06, + "loss": 0.24166807532310486, + "step": 3412 + }, + { + "epoch": 1.6865192141356729, + "grad_norm": 1.8803758040895027, + "learning_rate": 1.3200679465625453e-06, + "loss": 0.25514671206474304, + "step": 3413 + }, + { + "epoch": 1.687013468429507, + "grad_norm": 1.4161288294493581, + "learning_rate": 1.3160135237922011e-06, + "loss": 0.263123482465744, + "step": 3414 + }, + { + "epoch": 1.6875077227233413, + "grad_norm": 1.3692510048196695, + "learning_rate": 1.3119648983271527e-06, + "loss": 0.23763976991176605, + "step": 3415 + }, + { + "epoch": 1.6880019770171755, + "grad_norm": 1.4514594135261416, + "learning_rate": 1.3079220728701991e-06, + "loss": 0.28645598888397217, + "step": 3416 + }, + { + "epoch": 1.6884962313110097, + "grad_norm": 1.3145652794970974, + "learning_rate": 1.303885050120275e-06, + "loss": 0.2269624024629593, + "step": 3417 + }, + { + "epoch": 1.6889904856048437, + "grad_norm": 1.2380861054344243, + "learning_rate": 1.2998538327724386e-06, + "loss": 0.23601466417312622, + "step": 3418 + }, + { + "epoch": 1.6894847398986779, + "grad_norm": 1.4253359182592056, + "learning_rate": 1.2958284235178743e-06, + "loss": 0.2246169149875641, + "step": 3419 + }, + { + "epoch": 1.689978994192512, + "grad_norm": 1.497489718348998, + "learning_rate": 1.2918088250438865e-06, + "loss": 0.26519715785980225, + "step": 3420 + }, + { + "epoch": 1.6904732484863463, + "grad_norm": 1.443915314302877, + "learning_rate": 1.2877950400339046e-06, + "loss": 0.2590267062187195, + "step": 3421 + }, + { + "epoch": 1.6909675027801803, + "grad_norm": 1.3941822393799335, + "learning_rate": 1.2837870711674672e-06, + "loss": 0.2535945773124695, + "step": 3422 + }, + { + "epoch": 1.6914617570740145, + "grad_norm": 1.3833358145204437, + "learning_rate": 1.279784921120244e-06, + "loss": 0.21907874941825867, + "step": 3423 + }, + { + "epoch": 1.6919560113678487, + "grad_norm": 1.3775789573220893, + "learning_rate": 1.2757885925640124e-06, + "loss": 0.23314553499221802, + "step": 3424 + }, + { + "epoch": 1.6924502656616829, + "grad_norm": 1.2335650824399806, + "learning_rate": 1.2717980881666615e-06, + "loss": 0.2288433313369751, + "step": 3425 + }, + { + "epoch": 1.692944519955517, + "grad_norm": 1.3218922014839134, + "learning_rate": 1.2678134105921924e-06, + "loss": 0.2285449206829071, + "step": 3426 + }, + { + "epoch": 1.6934387742493513, + "grad_norm": 1.4061495134031399, + "learning_rate": 1.2638345625007287e-06, + "loss": 0.2898653447628021, + "step": 3427 + }, + { + "epoch": 1.6939330285431855, + "grad_norm": 1.3140964049835469, + "learning_rate": 1.2598615465484831e-06, + "loss": 0.23574519157409668, + "step": 3428 + }, + { + "epoch": 1.6944272828370197, + "grad_norm": 1.8163323929078987, + "learning_rate": 1.2558943653877887e-06, + "loss": 0.23385417461395264, + "step": 3429 + }, + { + "epoch": 1.694921537130854, + "grad_norm": 1.4332956021988026, + "learning_rate": 1.2519330216670766e-06, + "loss": 0.2555482089519501, + "step": 3430 + }, + { + "epoch": 1.6954157914246881, + "grad_norm": 1.3005186125236943, + "learning_rate": 1.247977518030885e-06, + "loss": 0.22221535444259644, + "step": 3431 + }, + { + "epoch": 1.6959100457185223, + "grad_norm": 1.2645213358789251, + "learning_rate": 1.2440278571198516e-06, + "loss": 0.21753090620040894, + "step": 3432 + }, + { + "epoch": 1.6964043000123563, + "grad_norm": 1.3199124302473737, + "learning_rate": 1.240084041570716e-06, + "loss": 0.2352944314479828, + "step": 3433 + }, + { + "epoch": 1.6968985543061905, + "grad_norm": 1.3019158889354874, + "learning_rate": 1.2361460740163045e-06, + "loss": 0.22581814229488373, + "step": 3434 + }, + { + "epoch": 1.6973928086000247, + "grad_norm": 1.5051457985045136, + "learning_rate": 1.2322139570855596e-06, + "loss": 0.28703421354293823, + "step": 3435 + }, + { + "epoch": 1.697887062893859, + "grad_norm": 1.2466294121854475, + "learning_rate": 1.2282876934034972e-06, + "loss": 0.21528789401054382, + "step": 3436 + }, + { + "epoch": 1.698381317187693, + "grad_norm": 1.3714652202926056, + "learning_rate": 1.2243672855912393e-06, + "loss": 0.2675422430038452, + "step": 3437 + }, + { + "epoch": 1.6988755714815271, + "grad_norm": 1.4468798550658835, + "learning_rate": 1.2204527362659913e-06, + "loss": 0.26681527495384216, + "step": 3438 + }, + { + "epoch": 1.6993698257753613, + "grad_norm": 1.6692863707132455, + "learning_rate": 1.216544048041054e-06, + "loss": 0.2436470091342926, + "step": 3439 + }, + { + "epoch": 1.6998640800691955, + "grad_norm": 1.3471564011899657, + "learning_rate": 1.212641223525809e-06, + "loss": 0.25458425283432007, + "step": 3440 + }, + { + "epoch": 1.7003583343630297, + "grad_norm": 1.5076141037655715, + "learning_rate": 1.2087442653257286e-06, + "loss": 0.24890559911727905, + "step": 3441 + }, + { + "epoch": 1.700852588656864, + "grad_norm": 1.2935321774740525, + "learning_rate": 1.2048531760423642e-06, + "loss": 0.26031816005706787, + "step": 3442 + }, + { + "epoch": 1.7013468429506982, + "grad_norm": 1.2852726465517723, + "learning_rate": 1.200967958273349e-06, + "loss": 0.22184975445270538, + "step": 3443 + }, + { + "epoch": 1.7018410972445324, + "grad_norm": 1.4055101079653758, + "learning_rate": 1.1970886146124073e-06, + "loss": 0.2670953571796417, + "step": 3444 + }, + { + "epoch": 1.7023353515383666, + "grad_norm": 1.4509425159233789, + "learning_rate": 1.1932151476493247e-06, + "loss": 0.27950525283813477, + "step": 3445 + }, + { + "epoch": 1.7028296058322008, + "grad_norm": 1.177838308027136, + "learning_rate": 1.1893475599699766e-06, + "loss": 0.23257380723953247, + "step": 3446 + }, + { + "epoch": 1.703323860126035, + "grad_norm": 1.33833163811184, + "learning_rate": 1.1854858541563086e-06, + "loss": 0.2586575746536255, + "step": 3447 + }, + { + "epoch": 1.703818114419869, + "grad_norm": 1.4079485154063143, + "learning_rate": 1.1816300327863406e-06, + "loss": 0.2677457928657532, + "step": 3448 + }, + { + "epoch": 1.7043123687137032, + "grad_norm": 1.565618455451115, + "learning_rate": 1.1777800984341637e-06, + "loss": 0.29866284132003784, + "step": 3449 + }, + { + "epoch": 1.7048066230075374, + "grad_norm": 1.3858480302164131, + "learning_rate": 1.1739360536699397e-06, + "loss": 0.27279675006866455, + "step": 3450 + }, + { + "epoch": 1.7053008773013716, + "grad_norm": 1.4265301971817403, + "learning_rate": 1.1700979010598945e-06, + "loss": 0.25695672631263733, + "step": 3451 + }, + { + "epoch": 1.7057951315952056, + "grad_norm": 1.2548676263466874, + "learning_rate": 1.1662656431663278e-06, + "loss": 0.22578787803649902, + "step": 3452 + }, + { + "epoch": 1.7062893858890398, + "grad_norm": 1.2884557931863843, + "learning_rate": 1.1624392825476016e-06, + "loss": 0.1946491301059723, + "step": 3453 + }, + { + "epoch": 1.706783640182874, + "grad_norm": 1.7214838792794764, + "learning_rate": 1.158618821758134e-06, + "loss": 0.2099667191505432, + "step": 3454 + }, + { + "epoch": 1.7072778944767082, + "grad_norm": 1.3956932051100446, + "learning_rate": 1.1548042633484148e-06, + "loss": 0.22660428285598755, + "step": 3455 + }, + { + "epoch": 1.7077721487705424, + "grad_norm": 1.486801447510752, + "learning_rate": 1.1509956098649855e-06, + "loss": 0.27378255128860474, + "step": 3456 + }, + { + "epoch": 1.7082664030643766, + "grad_norm": 1.3265929348116055, + "learning_rate": 1.1471928638504504e-06, + "loss": 0.2209164947271347, + "step": 3457 + }, + { + "epoch": 1.7087606573582108, + "grad_norm": 1.4225246621575494, + "learning_rate": 1.1433960278434687e-06, + "loss": 0.24310322105884552, + "step": 3458 + }, + { + "epoch": 1.709254911652045, + "grad_norm": 1.408175906725771, + "learning_rate": 1.1396051043787526e-06, + "loss": 0.23209068179130554, + "step": 3459 + }, + { + "epoch": 1.7097491659458792, + "grad_norm": 1.3815567972930465, + "learning_rate": 1.1358200959870703e-06, + "loss": 0.2514454126358032, + "step": 3460 + }, + { + "epoch": 1.7102434202397134, + "grad_norm": 1.4417631759146625, + "learning_rate": 1.132041005195239e-06, + "loss": 0.2580721378326416, + "step": 3461 + }, + { + "epoch": 1.7107376745335476, + "grad_norm": 1.3709268368925525, + "learning_rate": 1.1282678345261234e-06, + "loss": 0.26388949155807495, + "step": 3462 + }, + { + "epoch": 1.7112319288273818, + "grad_norm": 1.2783952905855267, + "learning_rate": 1.1245005864986402e-06, + "loss": 0.2194654643535614, + "step": 3463 + }, + { + "epoch": 1.7117261831212158, + "grad_norm": 1.2633121407835717, + "learning_rate": 1.1207392636277502e-06, + "loss": 0.2048814296722412, + "step": 3464 + }, + { + "epoch": 1.71222043741505, + "grad_norm": 1.33926020269927, + "learning_rate": 1.1169838684244584e-06, + "loss": 0.24165832996368408, + "step": 3465 + }, + { + "epoch": 1.7127146917088842, + "grad_norm": 1.3906329052137327, + "learning_rate": 1.1132344033958132e-06, + "loss": 0.2484482377767563, + "step": 3466 + }, + { + "epoch": 1.7132089460027184, + "grad_norm": 1.4564028814853938, + "learning_rate": 1.1094908710449048e-06, + "loss": 0.2406741827726364, + "step": 3467 + }, + { + "epoch": 1.7137032002965524, + "grad_norm": 1.4018531611252434, + "learning_rate": 1.1057532738708588e-06, + "loss": 0.2417721152305603, + "step": 3468 + }, + { + "epoch": 1.7141974545903866, + "grad_norm": 1.4560734194910743, + "learning_rate": 1.1020216143688446e-06, + "loss": 0.26304543018341064, + "step": 3469 + }, + { + "epoch": 1.7146917088842208, + "grad_norm": 1.476031518585943, + "learning_rate": 1.098295895030066e-06, + "loss": 0.30013689398765564, + "step": 3470 + }, + { + "epoch": 1.715185963178055, + "grad_norm": 1.3175345714713855, + "learning_rate": 1.0945761183417569e-06, + "loss": 0.21451817452907562, + "step": 3471 + }, + { + "epoch": 1.7156802174718893, + "grad_norm": 1.3300365419760627, + "learning_rate": 1.0908622867871854e-06, + "loss": 0.235377699136734, + "step": 3472 + }, + { + "epoch": 1.7161744717657235, + "grad_norm": 1.2866674867130445, + "learning_rate": 1.0871544028456594e-06, + "loss": 0.23560425639152527, + "step": 3473 + }, + { + "epoch": 1.7166687260595577, + "grad_norm": 1.3385949926310057, + "learning_rate": 1.083452468992503e-06, + "loss": 0.2431229054927826, + "step": 3474 + }, + { + "epoch": 1.7171629803533919, + "grad_norm": 1.2089508133597444, + "learning_rate": 1.0797564876990762e-06, + "loss": 0.211553692817688, + "step": 3475 + }, + { + "epoch": 1.717657234647226, + "grad_norm": 1.3533177183735723, + "learning_rate": 1.0760664614327643e-06, + "loss": 0.23565953969955444, + "step": 3476 + }, + { + "epoch": 1.7181514889410603, + "grad_norm": 1.328162178864468, + "learning_rate": 1.0723823926569744e-06, + "loss": 0.2052966058254242, + "step": 3477 + }, + { + "epoch": 1.7186457432348945, + "grad_norm": 1.3067945675468369, + "learning_rate": 1.06870428383114e-06, + "loss": 0.24831204116344452, + "step": 3478 + }, + { + "epoch": 1.7191399975287285, + "grad_norm": 1.273169118321956, + "learning_rate": 1.0650321374107142e-06, + "loss": 0.24706462025642395, + "step": 3479 + }, + { + "epoch": 1.7196342518225627, + "grad_norm": 1.4211234189057285, + "learning_rate": 1.0613659558471644e-06, + "loss": 0.20845818519592285, + "step": 3480 + }, + { + "epoch": 1.720128506116397, + "grad_norm": 1.2323642708024432, + "learning_rate": 1.0577057415879887e-06, + "loss": 0.21599797904491425, + "step": 3481 + }, + { + "epoch": 1.720622760410231, + "grad_norm": 1.4618240857831881, + "learning_rate": 1.054051497076689e-06, + "loss": 0.2381049394607544, + "step": 3482 + }, + { + "epoch": 1.721117014704065, + "grad_norm": 1.3155008449637104, + "learning_rate": 1.0504032247527874e-06, + "loss": 0.22402817010879517, + "step": 3483 + }, + { + "epoch": 1.7216112689978993, + "grad_norm": 1.5409902580545625, + "learning_rate": 1.0467609270518186e-06, + "loss": 0.24406251311302185, + "step": 3484 + }, + { + "epoch": 1.7221055232917335, + "grad_norm": 1.339222294791023, + "learning_rate": 1.0431246064053291e-06, + "loss": 0.24388936161994934, + "step": 3485 + }, + { + "epoch": 1.7225997775855677, + "grad_norm": 1.3265412686691833, + "learning_rate": 1.0394942652408735e-06, + "loss": 0.26131671667099, + "step": 3486 + }, + { + "epoch": 1.723094031879402, + "grad_norm": 1.3718768259485188, + "learning_rate": 1.0358699059820188e-06, + "loss": 0.247392475605011, + "step": 3487 + }, + { + "epoch": 1.7235882861732361, + "grad_norm": 1.335920284358623, + "learning_rate": 1.0322515310483316e-06, + "loss": 0.22713768482208252, + "step": 3488 + }, + { + "epoch": 1.7240825404670703, + "grad_norm": 1.3821197244420464, + "learning_rate": 1.0286391428553854e-06, + "loss": 0.2544357180595398, + "step": 3489 + }, + { + "epoch": 1.7245767947609045, + "grad_norm": 1.260460911336476, + "learning_rate": 1.0250327438147678e-06, + "loss": 0.23186656832695007, + "step": 3490 + }, + { + "epoch": 1.7250710490547387, + "grad_norm": 1.1804266448755296, + "learning_rate": 1.0214323363340506e-06, + "loss": 0.20387035608291626, + "step": 3491 + }, + { + "epoch": 1.725565303348573, + "grad_norm": 1.4265943405789598, + "learning_rate": 1.017837922816819e-06, + "loss": 0.25391846895217896, + "step": 3492 + }, + { + "epoch": 1.7260595576424071, + "grad_norm": 1.2603447890118837, + "learning_rate": 1.014249505662649e-06, + "loss": 0.23214812576770782, + "step": 3493 + }, + { + "epoch": 1.7265538119362414, + "grad_norm": 1.5899981641866812, + "learning_rate": 1.0106670872671187e-06, + "loss": 0.31888365745544434, + "step": 3494 + }, + { + "epoch": 1.7270480662300753, + "grad_norm": 1.2907611357867346, + "learning_rate": 1.0070906700217998e-06, + "loss": 0.23372362554073334, + "step": 3495 + }, + { + "epoch": 1.7275423205239095, + "grad_norm": 1.2449017093435057, + "learning_rate": 1.0035202563142577e-06, + "loss": 0.20082907378673553, + "step": 3496 + }, + { + "epoch": 1.7280365748177438, + "grad_norm": 1.3171397747083256, + "learning_rate": 9.99955848528046e-07, + "loss": 0.23895825445652008, + "step": 3497 + }, + { + "epoch": 1.728530829111578, + "grad_norm": 1.4142591511055072, + "learning_rate": 9.963974490427153e-07, + "loss": 0.30089694261550903, + "step": 3498 + }, + { + "epoch": 1.729025083405412, + "grad_norm": 1.4071492496267155, + "learning_rate": 9.928450602338046e-07, + "loss": 0.28134891390800476, + "step": 3499 + }, + { + "epoch": 1.7295193376992462, + "grad_norm": 1.239666390023503, + "learning_rate": 9.892986844728325e-07, + "loss": 0.1947125792503357, + "step": 3500 + }, + { + "epoch": 1.7300135919930804, + "grad_norm": 1.2560350647671819, + "learning_rate": 9.857583241273116e-07, + "loss": 0.252549409866333, + "step": 3501 + }, + { + "epoch": 1.7305078462869146, + "grad_norm": 1.8080125735095465, + "learning_rate": 9.82223981560736e-07, + "loss": 0.28061211109161377, + "step": 3502 + }, + { + "epoch": 1.7310021005807488, + "grad_norm": 1.3465400182463805, + "learning_rate": 9.786956591325813e-07, + "loss": 0.2492327094078064, + "step": 3503 + }, + { + "epoch": 1.731496354874583, + "grad_norm": 1.3114105920039891, + "learning_rate": 9.75173359198307e-07, + "loss": 0.20470373332500458, + "step": 3504 + }, + { + "epoch": 1.7319906091684172, + "grad_norm": 1.4582343704980485, + "learning_rate": 9.716570841093476e-07, + "loss": 0.24190351366996765, + "step": 3505 + }, + { + "epoch": 1.7324848634622514, + "grad_norm": 1.3916465638756335, + "learning_rate": 9.681468362131209e-07, + "loss": 0.28784725069999695, + "step": 3506 + }, + { + "epoch": 1.7329791177560856, + "grad_norm": 1.4872057430892556, + "learning_rate": 9.646426178530176e-07, + "loss": 0.2676560878753662, + "step": 3507 + }, + { + "epoch": 1.7334733720499198, + "grad_norm": 1.4118374661566944, + "learning_rate": 9.611444313684027e-07, + "loss": 0.2493928223848343, + "step": 3508 + }, + { + "epoch": 1.733967626343754, + "grad_norm": 1.272854491876895, + "learning_rate": 9.57652279094613e-07, + "loss": 0.23272472620010376, + "step": 3509 + }, + { + "epoch": 1.734461880637588, + "grad_norm": 1.3295460481124186, + "learning_rate": 9.541661633629662e-07, + "loss": 0.23245804011821747, + "step": 3510 + }, + { + "epoch": 1.7349561349314222, + "grad_norm": 1.318916212284511, + "learning_rate": 9.506860865007373e-07, + "loss": 0.22367024421691895, + "step": 3511 + }, + { + "epoch": 1.7354503892252564, + "grad_norm": 1.312738075120818, + "learning_rate": 9.472120508311788e-07, + "loss": 0.22332677245140076, + "step": 3512 + }, + { + "epoch": 1.7359446435190906, + "grad_norm": 1.3669711817276102, + "learning_rate": 9.437440586735081e-07, + "loss": 0.28051453828811646, + "step": 3513 + }, + { + "epoch": 1.7364388978129246, + "grad_norm": 1.5089189064457602, + "learning_rate": 9.402821123429017e-07, + "loss": 0.24815741181373596, + "step": 3514 + }, + { + "epoch": 1.7369331521067588, + "grad_norm": 1.338757796188803, + "learning_rate": 9.368262141505114e-07, + "loss": 0.24077603220939636, + "step": 3515 + }, + { + "epoch": 1.737427406400593, + "grad_norm": 1.419717776508751, + "learning_rate": 9.333763664034457e-07, + "loss": 0.24596062302589417, + "step": 3516 + }, + { + "epoch": 1.7379216606944272, + "grad_norm": 1.3347588363810814, + "learning_rate": 9.299325714047702e-07, + "loss": 0.22939634323120117, + "step": 3517 + }, + { + "epoch": 1.7384159149882614, + "grad_norm": 1.446015117761441, + "learning_rate": 9.264948314535116e-07, + "loss": 0.24870653450489044, + "step": 3518 + }, + { + "epoch": 1.7389101692820956, + "grad_norm": 1.2985600743859553, + "learning_rate": 9.23063148844664e-07, + "loss": 0.24589623510837555, + "step": 3519 + }, + { + "epoch": 1.7394044235759298, + "grad_norm": 1.3138002527909343, + "learning_rate": 9.196375258691615e-07, + "loss": 0.24228474497795105, + "step": 3520 + }, + { + "epoch": 1.739898677869764, + "grad_norm": 1.274631487561465, + "learning_rate": 9.162179648139047e-07, + "loss": 0.24371150135993958, + "step": 3521 + }, + { + "epoch": 1.7403929321635982, + "grad_norm": 1.279720023026326, + "learning_rate": 9.128044679617432e-07, + "loss": 0.24775750935077667, + "step": 3522 + }, + { + "epoch": 1.7408871864574325, + "grad_norm": 1.5257492514284694, + "learning_rate": 9.093970375914784e-07, + "loss": 0.2893243432044983, + "step": 3523 + }, + { + "epoch": 1.7413814407512667, + "grad_norm": 1.300861064044251, + "learning_rate": 9.059956759778632e-07, + "loss": 0.24014830589294434, + "step": 3524 + }, + { + "epoch": 1.7418756950451009, + "grad_norm": 1.399249837900177, + "learning_rate": 9.026003853915977e-07, + "loss": 0.21439003944396973, + "step": 3525 + }, + { + "epoch": 1.7423699493389349, + "grad_norm": 1.3253623378225632, + "learning_rate": 8.992111680993265e-07, + "loss": 0.23376847803592682, + "step": 3526 + }, + { + "epoch": 1.742864203632769, + "grad_norm": 1.3914877634645069, + "learning_rate": 8.958280263636487e-07, + "loss": 0.244795560836792, + "step": 3527 + }, + { + "epoch": 1.7433584579266033, + "grad_norm": 1.3847661327530765, + "learning_rate": 8.924509624430955e-07, + "loss": 0.2513751685619354, + "step": 3528 + }, + { + "epoch": 1.7438527122204373, + "grad_norm": 1.3808839230401615, + "learning_rate": 8.890799785921478e-07, + "loss": 0.2118893414735794, + "step": 3529 + }, + { + "epoch": 1.7443469665142715, + "grad_norm": 1.4606627623109902, + "learning_rate": 8.857150770612288e-07, + "loss": 0.2834109365940094, + "step": 3530 + }, + { + "epoch": 1.7448412208081057, + "grad_norm": 1.3959930901293698, + "learning_rate": 8.823562600966962e-07, + "loss": 0.2546151876449585, + "step": 3531 + }, + { + "epoch": 1.7453354751019399, + "grad_norm": 1.3410984246991777, + "learning_rate": 8.790035299408494e-07, + "loss": 0.2654607594013214, + "step": 3532 + }, + { + "epoch": 1.745829729395774, + "grad_norm": 1.4773453802832905, + "learning_rate": 8.756568888319239e-07, + "loss": 0.2720295786857605, + "step": 3533 + }, + { + "epoch": 1.7463239836896083, + "grad_norm": 1.3341271298777078, + "learning_rate": 8.723163390040856e-07, + "loss": 0.22259725630283356, + "step": 3534 + }, + { + "epoch": 1.7468182379834425, + "grad_norm": 1.3952830917524783, + "learning_rate": 8.68981882687443e-07, + "loss": 0.22918277978897095, + "step": 3535 + }, + { + "epoch": 1.7473124922772767, + "grad_norm": 1.4553860122555766, + "learning_rate": 8.656535221080297e-07, + "loss": 0.24396009743213654, + "step": 3536 + }, + { + "epoch": 1.747806746571111, + "grad_norm": 1.4530449395488945, + "learning_rate": 8.623312594878097e-07, + "loss": 0.2370900958776474, + "step": 3537 + }, + { + "epoch": 1.748301000864945, + "grad_norm": 1.4353409191789361, + "learning_rate": 8.590150970446798e-07, + "loss": 0.2785671055316925, + "step": 3538 + }, + { + "epoch": 1.7487952551587793, + "grad_norm": 1.3531168663907844, + "learning_rate": 8.557050369924624e-07, + "loss": 0.29365241527557373, + "step": 3539 + }, + { + "epoch": 1.7492895094526135, + "grad_norm": 1.3579124483240532, + "learning_rate": 8.524010815409068e-07, + "loss": 0.24052876234054565, + "step": 3540 + }, + { + "epoch": 1.7497837637464475, + "grad_norm": 1.5096531715278536, + "learning_rate": 8.49103232895685e-07, + "loss": 0.23938694596290588, + "step": 3541 + }, + { + "epoch": 1.7502780180402817, + "grad_norm": 1.2842245856075563, + "learning_rate": 8.458114932583961e-07, + "loss": 0.2244144231081009, + "step": 3542 + }, + { + "epoch": 1.750772272334116, + "grad_norm": 1.4659940645429403, + "learning_rate": 8.425258648265544e-07, + "loss": 0.25028878450393677, + "step": 3543 + }, + { + "epoch": 1.7512665266279501, + "grad_norm": 1.414718407414415, + "learning_rate": 8.39246349793602e-07, + "loss": 0.23135274648666382, + "step": 3544 + }, + { + "epoch": 1.751760780921784, + "grad_norm": 1.3004631081596045, + "learning_rate": 8.359729503488967e-07, + "loss": 0.23874548077583313, + "step": 3545 + }, + { + "epoch": 1.7522550352156183, + "grad_norm": 1.4912661633646227, + "learning_rate": 8.327056686777102e-07, + "loss": 0.2780659794807434, + "step": 3546 + }, + { + "epoch": 1.7527492895094525, + "grad_norm": 1.3424848463452685, + "learning_rate": 8.294445069612356e-07, + "loss": 0.213335320353508, + "step": 3547 + }, + { + "epoch": 1.7532435438032867, + "grad_norm": 1.3764395925344186, + "learning_rate": 8.261894673765757e-07, + "loss": 0.23284730315208435, + "step": 3548 + }, + { + "epoch": 1.753737798097121, + "grad_norm": 1.4152912967440003, + "learning_rate": 8.229405520967504e-07, + "loss": 0.25429633259773254, + "step": 3549 + }, + { + "epoch": 1.7542320523909551, + "grad_norm": 1.42166486412748, + "learning_rate": 8.196977632906877e-07, + "loss": 0.2519379258155823, + "step": 3550 + }, + { + "epoch": 1.7547263066847893, + "grad_norm": 1.3397514660513317, + "learning_rate": 8.164611031232283e-07, + "loss": 0.2510948181152344, + "step": 3551 + }, + { + "epoch": 1.7552205609786236, + "grad_norm": 1.4391737307664527, + "learning_rate": 8.132305737551193e-07, + "loss": 0.27415433526039124, + "step": 3552 + }, + { + "epoch": 1.7557148152724578, + "grad_norm": 1.4503824956137814, + "learning_rate": 8.100061773430179e-07, + "loss": 0.26723912358283997, + "step": 3553 + }, + { + "epoch": 1.756209069566292, + "grad_norm": 1.3305646078685684, + "learning_rate": 8.067879160394821e-07, + "loss": 0.2710701823234558, + "step": 3554 + }, + { + "epoch": 1.7567033238601262, + "grad_norm": 1.2981752509304552, + "learning_rate": 8.035757919929765e-07, + "loss": 0.23247234523296356, + "step": 3555 + }, + { + "epoch": 1.7571975781539602, + "grad_norm": 1.3788336069912301, + "learning_rate": 8.003698073478749e-07, + "loss": 0.2514559328556061, + "step": 3556 + }, + { + "epoch": 1.7576918324477944, + "grad_norm": 1.2669691261364102, + "learning_rate": 7.971699642444419e-07, + "loss": 0.23549199104309082, + "step": 3557 + }, + { + "epoch": 1.7581860867416286, + "grad_norm": 1.326325870924157, + "learning_rate": 7.939762648188476e-07, + "loss": 0.24511446058750153, + "step": 3558 + }, + { + "epoch": 1.7586803410354628, + "grad_norm": 1.244030857989509, + "learning_rate": 7.907887112031609e-07, + "loss": 0.18705075979232788, + "step": 3559 + }, + { + "epoch": 1.7591745953292968, + "grad_norm": 1.3163815425830492, + "learning_rate": 7.876073055253474e-07, + "loss": 0.24297048151493073, + "step": 3560 + }, + { + "epoch": 1.759668849623131, + "grad_norm": 1.3886968971610452, + "learning_rate": 7.844320499092683e-07, + "loss": 0.239119753241539, + "step": 3561 + }, + { + "epoch": 1.7601631039169652, + "grad_norm": 1.3716161630664097, + "learning_rate": 7.81262946474679e-07, + "loss": 0.2430122196674347, + "step": 3562 + }, + { + "epoch": 1.7606573582107994, + "grad_norm": 1.5018987096099226, + "learning_rate": 7.78099997337225e-07, + "loss": 0.2785049378871918, + "step": 3563 + }, + { + "epoch": 1.7611516125046336, + "grad_norm": 1.324774124882076, + "learning_rate": 7.749432046084471e-07, + "loss": 0.2451494038105011, + "step": 3564 + }, + { + "epoch": 1.7616458667984678, + "grad_norm": 1.2759037312949375, + "learning_rate": 7.717925703957785e-07, + "loss": 0.20071648061275482, + "step": 3565 + }, + { + "epoch": 1.762140121092302, + "grad_norm": 1.265455917769001, + "learning_rate": 7.686480968025333e-07, + "loss": 0.22308245301246643, + "step": 3566 + }, + { + "epoch": 1.7626343753861362, + "grad_norm": 1.4753453520092665, + "learning_rate": 7.655097859279192e-07, + "loss": 0.26082009077072144, + "step": 3567 + }, + { + "epoch": 1.7631286296799704, + "grad_norm": 1.2035646972809244, + "learning_rate": 7.623776398670268e-07, + "loss": 0.21026611328125, + "step": 3568 + }, + { + "epoch": 1.7636228839738046, + "grad_norm": 1.3616311603644673, + "learning_rate": 7.592516607108324e-07, + "loss": 0.23878465592861176, + "step": 3569 + }, + { + "epoch": 1.7641171382676388, + "grad_norm": 1.4512524044419246, + "learning_rate": 7.561318505461956e-07, + "loss": 0.30288150906562805, + "step": 3570 + }, + { + "epoch": 1.764611392561473, + "grad_norm": 1.3464088406966324, + "learning_rate": 7.530182114558582e-07, + "loss": 0.25749915838241577, + "step": 3571 + }, + { + "epoch": 1.765105646855307, + "grad_norm": 1.4850779133681176, + "learning_rate": 7.499107455184351e-07, + "loss": 0.23799163103103638, + "step": 3572 + }, + { + "epoch": 1.7655999011491412, + "grad_norm": 1.2970926183891958, + "learning_rate": 7.46809454808436e-07, + "loss": 0.2626670002937317, + "step": 3573 + }, + { + "epoch": 1.7660941554429754, + "grad_norm": 1.4394447645143165, + "learning_rate": 7.437143413962299e-07, + "loss": 0.23273026943206787, + "step": 3574 + }, + { + "epoch": 1.7665884097368096, + "grad_norm": 1.329151714167698, + "learning_rate": 7.406254073480735e-07, + "loss": 0.22592151165008545, + "step": 3575 + }, + { + "epoch": 1.7670826640306436, + "grad_norm": 1.4000212660765223, + "learning_rate": 7.375426547260944e-07, + "loss": 0.2594859004020691, + "step": 3576 + }, + { + "epoch": 1.7675769183244778, + "grad_norm": 1.2114788921542652, + "learning_rate": 7.344660855882946e-07, + "loss": 0.2161571979522705, + "step": 3577 + }, + { + "epoch": 1.768071172618312, + "grad_norm": 1.2669666342048183, + "learning_rate": 7.313957019885487e-07, + "loss": 0.23052990436553955, + "step": 3578 + }, + { + "epoch": 1.7685654269121462, + "grad_norm": 1.2921856609362714, + "learning_rate": 7.283315059766005e-07, + "loss": 0.2309163510799408, + "step": 3579 + }, + { + "epoch": 1.7690596812059804, + "grad_norm": 1.3800150012724666, + "learning_rate": 7.252734995980604e-07, + "loss": 0.24543863534927368, + "step": 3580 + }, + { + "epoch": 1.7695539354998147, + "grad_norm": 1.315509052214176, + "learning_rate": 7.22221684894413e-07, + "loss": 0.27616050839424133, + "step": 3581 + }, + { + "epoch": 1.7700481897936489, + "grad_norm": 1.5849292816622715, + "learning_rate": 7.191760639030077e-07, + "loss": 0.2247719019651413, + "step": 3582 + }, + { + "epoch": 1.770542444087483, + "grad_norm": 1.3600242028973613, + "learning_rate": 7.161366386570545e-07, + "loss": 0.28721702098846436, + "step": 3583 + }, + { + "epoch": 1.7710366983813173, + "grad_norm": 1.3444976293289765, + "learning_rate": 7.131034111856294e-07, + "loss": 0.24191290140151978, + "step": 3584 + }, + { + "epoch": 1.7715309526751515, + "grad_norm": 1.3549546462173616, + "learning_rate": 7.100763835136748e-07, + "loss": 0.24049970507621765, + "step": 3585 + }, + { + "epoch": 1.7720252069689857, + "grad_norm": 1.4855378384649431, + "learning_rate": 7.070555576619887e-07, + "loss": 0.255404531955719, + "step": 3586 + }, + { + "epoch": 1.7725194612628197, + "grad_norm": 1.3672964019576628, + "learning_rate": 7.040409356472333e-07, + "loss": 0.23041129112243652, + "step": 3587 + }, + { + "epoch": 1.7730137155566539, + "grad_norm": 1.3790812567511086, + "learning_rate": 7.010325194819278e-07, + "loss": 0.2589847147464752, + "step": 3588 + }, + { + "epoch": 1.773507969850488, + "grad_norm": 1.4114272066031652, + "learning_rate": 6.980303111744424e-07, + "loss": 0.2604563236236572, + "step": 3589 + }, + { + "epoch": 1.7740022241443223, + "grad_norm": 1.3786249354000182, + "learning_rate": 6.950343127290138e-07, + "loss": 0.26831385493278503, + "step": 3590 + }, + { + "epoch": 1.7744964784381563, + "grad_norm": 1.3398044201914234, + "learning_rate": 6.920445261457276e-07, + "loss": 0.20475032925605774, + "step": 3591 + }, + { + "epoch": 1.7749907327319905, + "grad_norm": 1.669693479578031, + "learning_rate": 6.890609534205206e-07, + "loss": 0.32378682494163513, + "step": 3592 + }, + { + "epoch": 1.7754849870258247, + "grad_norm": 1.4433175991642826, + "learning_rate": 6.86083596545184e-07, + "loss": 0.2526070177555084, + "step": 3593 + }, + { + "epoch": 1.775979241319659, + "grad_norm": 1.3738645357999373, + "learning_rate": 6.831124575073578e-07, + "loss": 0.2467537820339203, + "step": 3594 + }, + { + "epoch": 1.776473495613493, + "grad_norm": 1.4660741149631984, + "learning_rate": 6.801475382905332e-07, + "loss": 0.2857215404510498, + "step": 3595 + }, + { + "epoch": 1.7769677499073273, + "grad_norm": 1.4443968381596262, + "learning_rate": 6.771888408740479e-07, + "loss": 0.23615087568759918, + "step": 3596 + }, + { + "epoch": 1.7774620042011615, + "grad_norm": 1.451390021672748, + "learning_rate": 6.742363672330854e-07, + "loss": 0.2613365054130554, + "step": 3597 + }, + { + "epoch": 1.7779562584949957, + "grad_norm": 1.465141872886975, + "learning_rate": 6.712901193386756e-07, + "loss": 0.2558417320251465, + "step": 3598 + }, + { + "epoch": 1.77845051278883, + "grad_norm": 1.4467371641088191, + "learning_rate": 6.683500991576919e-07, + "loss": 0.2683117091655731, + "step": 3599 + }, + { + "epoch": 1.7789447670826641, + "grad_norm": 1.4625204738144366, + "learning_rate": 6.654163086528487e-07, + "loss": 0.2546064555644989, + "step": 3600 + }, + { + "epoch": 1.7794390213764983, + "grad_norm": 1.5872307428555623, + "learning_rate": 6.624887497827004e-07, + "loss": 0.2683906555175781, + "step": 3601 + }, + { + "epoch": 1.7799332756703325, + "grad_norm": 1.363900663564542, + "learning_rate": 6.595674245016492e-07, + "loss": 0.23260846734046936, + "step": 3602 + }, + { + "epoch": 1.7804275299641665, + "grad_norm": 1.3840728964244504, + "learning_rate": 6.566523347599252e-07, + "loss": 0.22884608805179596, + "step": 3603 + }, + { + "epoch": 1.7809217842580007, + "grad_norm": 1.3583647776279095, + "learning_rate": 6.537434825036027e-07, + "loss": 0.24236485362052917, + "step": 3604 + }, + { + "epoch": 1.781416038551835, + "grad_norm": 1.4869775379128283, + "learning_rate": 6.508408696745893e-07, + "loss": 0.29543957114219666, + "step": 3605 + }, + { + "epoch": 1.781910292845669, + "grad_norm": 1.3626399619539873, + "learning_rate": 6.479444982106276e-07, + "loss": 0.24011383950710297, + "step": 3606 + }, + { + "epoch": 1.7824045471395031, + "grad_norm": 1.3135116984072812, + "learning_rate": 6.450543700452949e-07, + "loss": 0.248407244682312, + "step": 3607 + }, + { + "epoch": 1.7828988014333373, + "grad_norm": 1.4089475770026854, + "learning_rate": 6.421704871080004e-07, + "loss": 0.2405746728181839, + "step": 3608 + }, + { + "epoch": 1.7833930557271716, + "grad_norm": 1.2522903384339197, + "learning_rate": 6.392928513239804e-07, + "loss": 0.24601790308952332, + "step": 3609 + }, + { + "epoch": 1.7838873100210058, + "grad_norm": 1.2436557177887422, + "learning_rate": 6.36421464614303e-07, + "loss": 0.20030242204666138, + "step": 3610 + }, + { + "epoch": 1.78438156431484, + "grad_norm": 1.3296983724782687, + "learning_rate": 6.335563288958691e-07, + "loss": 0.23858311772346497, + "step": 3611 + }, + { + "epoch": 1.7848758186086742, + "grad_norm": 1.4392435044249465, + "learning_rate": 6.306974460813986e-07, + "loss": 0.2330242097377777, + "step": 3612 + }, + { + "epoch": 1.7853700729025084, + "grad_norm": 1.445863340067418, + "learning_rate": 6.278448180794416e-07, + "loss": 0.25513261556625366, + "step": 3613 + }, + { + "epoch": 1.7858643271963426, + "grad_norm": 1.3248647587522469, + "learning_rate": 6.249984467943737e-07, + "loss": 0.2298405021429062, + "step": 3614 + }, + { + "epoch": 1.7863585814901768, + "grad_norm": 1.3090685428520892, + "learning_rate": 6.221583341263893e-07, + "loss": 0.22120623290538788, + "step": 3615 + }, + { + "epoch": 1.786852835784011, + "grad_norm": 1.3392765156774626, + "learning_rate": 6.193244819715072e-07, + "loss": 0.26976969838142395, + "step": 3616 + }, + { + "epoch": 1.7873470900778452, + "grad_norm": 1.3657180436845977, + "learning_rate": 6.164968922215697e-07, + "loss": 0.24354586005210876, + "step": 3617 + }, + { + "epoch": 1.7878413443716792, + "grad_norm": 1.4254233164600292, + "learning_rate": 6.136755667642302e-07, + "loss": 0.2849498689174652, + "step": 3618 + }, + { + "epoch": 1.7883355986655134, + "grad_norm": 1.2708453781613391, + "learning_rate": 6.10860507482971e-07, + "loss": 0.2431584596633911, + "step": 3619 + }, + { + "epoch": 1.7888298529593476, + "grad_norm": 1.5031154285158648, + "learning_rate": 6.080517162570809e-07, + "loss": 0.2384781688451767, + "step": 3620 + }, + { + "epoch": 1.7893241072531818, + "grad_norm": 1.45686854578023, + "learning_rate": 6.052491949616712e-07, + "loss": 0.23782339692115784, + "step": 3621 + }, + { + "epoch": 1.7898183615470158, + "grad_norm": 1.342733882676876, + "learning_rate": 6.024529454676631e-07, + "loss": 0.23293447494506836, + "step": 3622 + }, + { + "epoch": 1.79031261584085, + "grad_norm": 1.2930495337650696, + "learning_rate": 5.996629696417955e-07, + "loss": 0.21202662587165833, + "step": 3623 + }, + { + "epoch": 1.7908068701346842, + "grad_norm": 1.5889243123202152, + "learning_rate": 5.968792693466141e-07, + "loss": 0.27971768379211426, + "step": 3624 + }, + { + "epoch": 1.7913011244285184, + "grad_norm": 1.441999540970622, + "learning_rate": 5.94101846440478e-07, + "loss": 0.2433638721704483, + "step": 3625 + }, + { + "epoch": 1.7917953787223526, + "grad_norm": 1.3682285780053611, + "learning_rate": 5.91330702777555e-07, + "loss": 0.21812602877616882, + "step": 3626 + }, + { + "epoch": 1.7922896330161868, + "grad_norm": 1.924541384200403, + "learning_rate": 5.88565840207822e-07, + "loss": 0.2135028839111328, + "step": 3627 + }, + { + "epoch": 1.792783887310021, + "grad_norm": 1.3226125497456243, + "learning_rate": 5.858072605770626e-07, + "loss": 0.23919226229190826, + "step": 3628 + }, + { + "epoch": 1.7932781416038552, + "grad_norm": 1.3008122554752455, + "learning_rate": 5.830549657268614e-07, + "loss": 0.2495008111000061, + "step": 3629 + }, + { + "epoch": 1.7937723958976894, + "grad_norm": 1.4679589100669386, + "learning_rate": 5.80308957494613e-07, + "loss": 0.2531805634498596, + "step": 3630 + }, + { + "epoch": 1.7942666501915236, + "grad_norm": 1.2654762717037664, + "learning_rate": 5.775692377135156e-07, + "loss": 0.22644619643688202, + "step": 3631 + }, + { + "epoch": 1.7947609044853579, + "grad_norm": 1.2567004368149646, + "learning_rate": 5.748358082125638e-07, + "loss": 0.2264411598443985, + "step": 3632 + }, + { + "epoch": 1.7952551587791918, + "grad_norm": 1.3206987713043599, + "learning_rate": 5.721086708165568e-07, + "loss": 0.2663921117782593, + "step": 3633 + }, + { + "epoch": 1.795749413073026, + "grad_norm": 1.35703763331278, + "learning_rate": 5.693878273460951e-07, + "loss": 0.2398051619529724, + "step": 3634 + }, + { + "epoch": 1.7962436673668603, + "grad_norm": 1.4184943078470147, + "learning_rate": 5.6667327961757e-07, + "loss": 0.28781580924987793, + "step": 3635 + }, + { + "epoch": 1.7967379216606945, + "grad_norm": 2.1761368991988084, + "learning_rate": 5.639650294431787e-07, + "loss": 0.2232055813074112, + "step": 3636 + }, + { + "epoch": 1.7972321759545284, + "grad_norm": 1.402577073030083, + "learning_rate": 5.612630786309103e-07, + "loss": 0.23214340209960938, + "step": 3637 + }, + { + "epoch": 1.7977264302483627, + "grad_norm": 1.2714718799747338, + "learning_rate": 5.585674289845467e-07, + "loss": 0.21598659455776215, + "step": 3638 + }, + { + "epoch": 1.7982206845421969, + "grad_norm": 1.351029180109128, + "learning_rate": 5.558780823036658e-07, + "loss": 0.2760176956653595, + "step": 3639 + }, + { + "epoch": 1.798714938836031, + "grad_norm": 1.3941723061811673, + "learning_rate": 5.531950403836373e-07, + "loss": 0.2641429901123047, + "step": 3640 + }, + { + "epoch": 1.7992091931298653, + "grad_norm": 1.390874465362023, + "learning_rate": 5.505183050156204e-07, + "loss": 0.2407502382993698, + "step": 3641 + }, + { + "epoch": 1.7997034474236995, + "grad_norm": 1.2164247841450622, + "learning_rate": 5.478478779865682e-07, + "loss": 0.19910940527915955, + "step": 3642 + }, + { + "epoch": 1.8001977017175337, + "grad_norm": 1.4412656091937792, + "learning_rate": 5.451837610792166e-07, + "loss": 0.2716234624385834, + "step": 3643 + }, + { + "epoch": 1.800691956011368, + "grad_norm": 1.3284477963142056, + "learning_rate": 5.42525956072093e-07, + "loss": 0.2784198224544525, + "step": 3644 + }, + { + "epoch": 1.801186210305202, + "grad_norm": 1.3444314874013155, + "learning_rate": 5.398744647395104e-07, + "loss": 0.2277904599905014, + "step": 3645 + }, + { + "epoch": 1.8016804645990363, + "grad_norm": 1.4299842617414134, + "learning_rate": 5.372292888515684e-07, + "loss": 0.26788002252578735, + "step": 3646 + }, + { + "epoch": 1.8021747188928705, + "grad_norm": 1.3607541160674654, + "learning_rate": 5.345904301741445e-07, + "loss": 0.22452175617218018, + "step": 3647 + }, + { + "epoch": 1.8026689731867047, + "grad_norm": 1.44450101040719, + "learning_rate": 5.319578904689071e-07, + "loss": 0.2337179332971573, + "step": 3648 + }, + { + "epoch": 1.8031632274805387, + "grad_norm": 1.3116281040368842, + "learning_rate": 5.293316714932983e-07, + "loss": 0.2614130973815918, + "step": 3649 + }, + { + "epoch": 1.803657481774373, + "grad_norm": 1.3142722561763884, + "learning_rate": 5.267117750005468e-07, + "loss": 0.2577320635318756, + "step": 3650 + }, + { + "epoch": 1.8041517360682071, + "grad_norm": 1.231846526151871, + "learning_rate": 5.24098202739658e-07, + "loss": 0.2058672308921814, + "step": 3651 + }, + { + "epoch": 1.8046459903620413, + "grad_norm": 1.3970882237865128, + "learning_rate": 5.214909564554138e-07, + "loss": 0.25223514437675476, + "step": 3652 + }, + { + "epoch": 1.8051402446558753, + "grad_norm": 1.3683940041570406, + "learning_rate": 5.188900378883765e-07, + "loss": 0.25651872158050537, + "step": 3653 + }, + { + "epoch": 1.8056344989497095, + "grad_norm": 1.3167902113360206, + "learning_rate": 5.162954487748828e-07, + "loss": 0.257855623960495, + "step": 3654 + }, + { + "epoch": 1.8061287532435437, + "grad_norm": 1.3408137381423195, + "learning_rate": 5.137071908470381e-07, + "loss": 0.22942093014717102, + "step": 3655 + }, + { + "epoch": 1.806623007537378, + "grad_norm": 1.3905585042591802, + "learning_rate": 5.111252658327326e-07, + "loss": 0.25629153847694397, + "step": 3656 + }, + { + "epoch": 1.8071172618312121, + "grad_norm": 1.3417957205977868, + "learning_rate": 5.085496754556207e-07, + "loss": 0.23882299661636353, + "step": 3657 + }, + { + "epoch": 1.8076115161250463, + "grad_norm": 1.3092883951034957, + "learning_rate": 5.059804214351283e-07, + "loss": 0.2323160469532013, + "step": 3658 + }, + { + "epoch": 1.8081057704188805, + "grad_norm": 1.318607555394289, + "learning_rate": 5.034175054864531e-07, + "loss": 0.2080869972705841, + "step": 3659 + }, + { + "epoch": 1.8086000247127147, + "grad_norm": 1.476319660825777, + "learning_rate": 5.008609293205624e-07, + "loss": 0.22439511120319366, + "step": 3660 + }, + { + "epoch": 1.809094279006549, + "grad_norm": 1.3639928518895943, + "learning_rate": 4.983106946441885e-07, + "loss": 0.2527809739112854, + "step": 3661 + }, + { + "epoch": 1.8095885333003832, + "grad_norm": 1.181172468164539, + "learning_rate": 4.957668031598328e-07, + "loss": 0.2149294763803482, + "step": 3662 + }, + { + "epoch": 1.8100827875942174, + "grad_norm": 1.3244234520799762, + "learning_rate": 4.932292565657615e-07, + "loss": 0.2471565306186676, + "step": 3663 + }, + { + "epoch": 1.8105770418880514, + "grad_norm": 1.328701941509414, + "learning_rate": 4.906980565560004e-07, + "loss": 0.25820282101631165, + "step": 3664 + }, + { + "epoch": 1.8110712961818856, + "grad_norm": 1.4538113944792308, + "learning_rate": 4.881732048203469e-07, + "loss": 0.2815645933151245, + "step": 3665 + }, + { + "epoch": 1.8115655504757198, + "grad_norm": 1.4078938194960222, + "learning_rate": 4.856547030443559e-07, + "loss": 0.23443330824375153, + "step": 3666 + }, + { + "epoch": 1.812059804769554, + "grad_norm": 1.413689966723704, + "learning_rate": 4.831425529093403e-07, + "loss": 0.2452373206615448, + "step": 3667 + }, + { + "epoch": 1.812554059063388, + "grad_norm": 1.2405057526282826, + "learning_rate": 4.806367560923764e-07, + "loss": 0.21815839409828186, + "step": 3668 + }, + { + "epoch": 1.8130483133572222, + "grad_norm": 1.3418751770168684, + "learning_rate": 4.781373142663003e-07, + "loss": 0.23436316847801208, + "step": 3669 + }, + { + "epoch": 1.8135425676510564, + "grad_norm": 1.277189547676361, + "learning_rate": 4.75644229099701e-07, + "loss": 0.18917132914066315, + "step": 3670 + }, + { + "epoch": 1.8140368219448906, + "grad_norm": 1.3842801505047626, + "learning_rate": 4.7315750225692905e-07, + "loss": 0.24570351839065552, + "step": 3671 + }, + { + "epoch": 1.8145310762387248, + "grad_norm": 1.2514343072057177, + "learning_rate": 4.7067713539808543e-07, + "loss": 0.23367956280708313, + "step": 3672 + }, + { + "epoch": 1.815025330532559, + "grad_norm": 1.372723501995688, + "learning_rate": 4.682031301790291e-07, + "loss": 0.24563322961330414, + "step": 3673 + }, + { + "epoch": 1.8155195848263932, + "grad_norm": 1.3552399849082646, + "learning_rate": 4.6573548825137204e-07, + "loss": 0.2425815761089325, + "step": 3674 + }, + { + "epoch": 1.8160138391202274, + "grad_norm": 1.2732667032266225, + "learning_rate": 4.632742112624744e-07, + "loss": 0.2173803597688675, + "step": 3675 + }, + { + "epoch": 1.8165080934140616, + "grad_norm": 1.4674070434763509, + "learning_rate": 4.6081930085544734e-07, + "loss": 0.2665477395057678, + "step": 3676 + }, + { + "epoch": 1.8170023477078958, + "grad_norm": 1.2335396057121188, + "learning_rate": 4.5837075866915994e-07, + "loss": 0.23834756016731262, + "step": 3677 + }, + { + "epoch": 1.81749660200173, + "grad_norm": 1.3614176095599289, + "learning_rate": 4.55928586338219e-07, + "loss": 0.2479294240474701, + "step": 3678 + }, + { + "epoch": 1.8179908562955642, + "grad_norm": 1.370567608566195, + "learning_rate": 4.5349278549298716e-07, + "loss": 0.24136531352996826, + "step": 3679 + }, + { + "epoch": 1.8184851105893982, + "grad_norm": 1.3881148070094378, + "learning_rate": 4.510633577595669e-07, + "loss": 0.24397623538970947, + "step": 3680 + }, + { + "epoch": 1.8189793648832324, + "grad_norm": 1.3189259944629108, + "learning_rate": 4.48640304759812e-07, + "loss": 0.27078694105148315, + "step": 3681 + }, + { + "epoch": 1.8194736191770666, + "grad_norm": 1.5222352072420349, + "learning_rate": 4.4622362811131745e-07, + "loss": 0.2544251084327698, + "step": 3682 + }, + { + "epoch": 1.8199678734709008, + "grad_norm": 1.3696668102162666, + "learning_rate": 4.4381332942742384e-07, + "loss": 0.2528873682022095, + "step": 3683 + }, + { + "epoch": 1.8204621277647348, + "grad_norm": 1.470119432024013, + "learning_rate": 4.414094103172084e-07, + "loss": 0.25487592816352844, + "step": 3684 + }, + { + "epoch": 1.820956382058569, + "grad_norm": 1.3872878168023053, + "learning_rate": 4.3901187238549414e-07, + "loss": 0.22061187028884888, + "step": 3685 + }, + { + "epoch": 1.8214506363524032, + "grad_norm": 1.355863796177502, + "learning_rate": 4.366207172328452e-07, + "loss": 0.2793615758419037, + "step": 3686 + }, + { + "epoch": 1.8219448906462374, + "grad_norm": 1.2429295933181803, + "learning_rate": 4.342359464555612e-07, + "loss": 0.2323140949010849, + "step": 3687 + }, + { + "epoch": 1.8224391449400716, + "grad_norm": 1.370663497944958, + "learning_rate": 4.3185756164568104e-07, + "loss": 0.2616409659385681, + "step": 3688 + }, + { + "epoch": 1.8229333992339058, + "grad_norm": 1.3843956978002738, + "learning_rate": 4.294855643909812e-07, + "loss": 0.203874871134758, + "step": 3689 + }, + { + "epoch": 1.82342765352774, + "grad_norm": 1.2289114807067458, + "learning_rate": 4.271199562749717e-07, + "loss": 0.2272878736257553, + "step": 3690 + }, + { + "epoch": 1.8239219078215743, + "grad_norm": 1.338434972419624, + "learning_rate": 4.247607388769004e-07, + "loss": 0.23728047311306, + "step": 3691 + }, + { + "epoch": 1.8244161621154085, + "grad_norm": 1.4750745226923418, + "learning_rate": 4.2240791377174737e-07, + "loss": 0.2570911943912506, + "step": 3692 + }, + { + "epoch": 1.8249104164092427, + "grad_norm": 1.4969254471055817, + "learning_rate": 4.200614825302207e-07, + "loss": 0.24265727400779724, + "step": 3693 + }, + { + "epoch": 1.8254046707030769, + "grad_norm": 1.405819385173928, + "learning_rate": 4.177214467187707e-07, + "loss": 0.24822816252708435, + "step": 3694 + }, + { + "epoch": 1.8258989249969109, + "grad_norm": 1.3218266218091017, + "learning_rate": 4.153878078995677e-07, + "loss": 0.23382046818733215, + "step": 3695 + }, + { + "epoch": 1.826393179290745, + "grad_norm": 1.4037010093048616, + "learning_rate": 4.130605676305166e-07, + "loss": 0.27590304613113403, + "step": 3696 + }, + { + "epoch": 1.8268874335845793, + "grad_norm": 1.4161501438852775, + "learning_rate": 4.1073972746525026e-07, + "loss": 0.25702038407325745, + "step": 3697 + }, + { + "epoch": 1.8273816878784135, + "grad_norm": 1.488627338365754, + "learning_rate": 4.0842528895312707e-07, + "loss": 0.28980135917663574, + "step": 3698 + }, + { + "epoch": 1.8278759421722475, + "grad_norm": 1.5075437506896323, + "learning_rate": 4.0611725363923435e-07, + "loss": 0.22739271819591522, + "step": 3699 + }, + { + "epoch": 1.8283701964660817, + "grad_norm": 1.4671495030162094, + "learning_rate": 4.038156230643853e-07, + "loss": 0.26396334171295166, + "step": 3700 + }, + { + "epoch": 1.8288644507599159, + "grad_norm": 1.5855861974203058, + "learning_rate": 4.015203987651106e-07, + "loss": 0.25548964738845825, + "step": 3701 + }, + { + "epoch": 1.82935870505375, + "grad_norm": 1.3315259515817186, + "learning_rate": 3.992315822736725e-07, + "loss": 0.22227105498313904, + "step": 3702 + }, + { + "epoch": 1.8298529593475843, + "grad_norm": 1.445413897274288, + "learning_rate": 3.969491751180543e-07, + "loss": 0.30854254961013794, + "step": 3703 + }, + { + "epoch": 1.8303472136414185, + "grad_norm": 1.4678349464130562, + "learning_rate": 3.946731788219538e-07, + "loss": 0.27471429109573364, + "step": 3704 + }, + { + "epoch": 1.8308414679352527, + "grad_norm": 1.334822235698922, + "learning_rate": 3.924035949047955e-07, + "loss": 0.2317768633365631, + "step": 3705 + }, + { + "epoch": 1.831335722229087, + "grad_norm": 1.4197098897896443, + "learning_rate": 3.901404248817231e-07, + "loss": 0.2450723946094513, + "step": 3706 + }, + { + "epoch": 1.8318299765229211, + "grad_norm": 1.4676009490842072, + "learning_rate": 3.878836702635935e-07, + "loss": 0.2428039014339447, + "step": 3707 + }, + { + "epoch": 1.8323242308167553, + "grad_norm": 1.4376208196933993, + "learning_rate": 3.856333325569861e-07, + "loss": 0.27869629859924316, + "step": 3708 + }, + { + "epoch": 1.8328184851105895, + "grad_norm": 1.2808253694997749, + "learning_rate": 3.8338941326419353e-07, + "loss": 0.21661749482154846, + "step": 3709 + }, + { + "epoch": 1.8333127394044237, + "grad_norm": 1.3452610575891626, + "learning_rate": 3.8115191388322206e-07, + "loss": 0.2655249834060669, + "step": 3710 + }, + { + "epoch": 1.8338069936982577, + "grad_norm": 1.3643896556477109, + "learning_rate": 3.7892083590779784e-07, + "loss": 0.2281903475522995, + "step": 3711 + }, + { + "epoch": 1.834301247992092, + "grad_norm": 1.492937654145658, + "learning_rate": 3.7669618082735504e-07, + "loss": 0.24545446038246155, + "step": 3712 + }, + { + "epoch": 1.8347955022859261, + "grad_norm": 1.2788794377367898, + "learning_rate": 3.7447795012704237e-07, + "loss": 0.24749556183815002, + "step": 3713 + }, + { + "epoch": 1.8352897565797601, + "grad_norm": 1.4606135919595513, + "learning_rate": 3.722661452877163e-07, + "loss": 0.26234689354896545, + "step": 3714 + }, + { + "epoch": 1.8357840108735943, + "grad_norm": 1.3697239858165842, + "learning_rate": 3.700607677859491e-07, + "loss": 0.21348389983177185, + "step": 3715 + }, + { + "epoch": 1.8362782651674285, + "grad_norm": 1.3198403259649356, + "learning_rate": 3.6786181909401864e-07, + "loss": 0.2527744770050049, + "step": 3716 + }, + { + "epoch": 1.8367725194612627, + "grad_norm": 1.3153305717810528, + "learning_rate": 3.6566930067991056e-07, + "loss": 0.2175026535987854, + "step": 3717 + }, + { + "epoch": 1.837266773755097, + "grad_norm": 1.3795015677920492, + "learning_rate": 3.6348321400731967e-07, + "loss": 0.2847272753715515, + "step": 3718 + }, + { + "epoch": 1.8377610280489312, + "grad_norm": 1.4885049894439106, + "learning_rate": 3.613035605356463e-07, + "loss": 0.2549072504043579, + "step": 3719 + }, + { + "epoch": 1.8382552823427654, + "grad_norm": 1.3444222427486383, + "learning_rate": 3.591303417199965e-07, + "loss": 0.24534013867378235, + "step": 3720 + }, + { + "epoch": 1.8387495366365996, + "grad_norm": 1.461602538702394, + "learning_rate": 3.5696355901117865e-07, + "loss": 0.25336408615112305, + "step": 3721 + }, + { + "epoch": 1.8392437909304338, + "grad_norm": 1.4932038589381658, + "learning_rate": 3.548032138557056e-07, + "loss": 0.2787632346153259, + "step": 3722 + }, + { + "epoch": 1.839738045224268, + "grad_norm": 1.3687827308256, + "learning_rate": 3.5264930769579595e-07, + "loss": 0.22364875674247742, + "step": 3723 + }, + { + "epoch": 1.8402322995181022, + "grad_norm": 1.509493433022075, + "learning_rate": 3.5050184196936285e-07, + "loss": 0.2526230216026306, + "step": 3724 + }, + { + "epoch": 1.8407265538119364, + "grad_norm": 1.449998297788816, + "learning_rate": 3.483608181100262e-07, + "loss": 0.2412932962179184, + "step": 3725 + }, + { + "epoch": 1.8412208081057704, + "grad_norm": 1.4100243345912178, + "learning_rate": 3.462262375471026e-07, + "loss": 0.28693705797195435, + "step": 3726 + }, + { + "epoch": 1.8417150623996046, + "grad_norm": 1.4369299703462226, + "learning_rate": 3.4409810170560667e-07, + "loss": 0.2600281834602356, + "step": 3727 + }, + { + "epoch": 1.8422093166934388, + "grad_norm": 1.3702328145360616, + "learning_rate": 3.4197641200625185e-07, + "loss": 0.24885150790214539, + "step": 3728 + }, + { + "epoch": 1.842703570987273, + "grad_norm": 1.476451776245579, + "learning_rate": 3.398611698654497e-07, + "loss": 0.27185115218162537, + "step": 3729 + }, + { + "epoch": 1.843197825281107, + "grad_norm": 1.6779196665373166, + "learning_rate": 3.377523766953006e-07, + "loss": 0.2999323010444641, + "step": 3730 + }, + { + "epoch": 1.8436920795749412, + "grad_norm": 1.3755033406487114, + "learning_rate": 3.356500339036106e-07, + "loss": 0.22807806730270386, + "step": 3731 + }, + { + "epoch": 1.8441863338687754, + "grad_norm": 1.4727836521575108, + "learning_rate": 3.3355414289387155e-07, + "loss": 0.23006726801395416, + "step": 3732 + }, + { + "epoch": 1.8446805881626096, + "grad_norm": 1.4892072813513704, + "learning_rate": 3.314647050652686e-07, + "loss": 0.25261276960372925, + "step": 3733 + }, + { + "epoch": 1.8451748424564438, + "grad_norm": 1.3741598151970273, + "learning_rate": 3.293817218126827e-07, + "loss": 0.2484148144721985, + "step": 3734 + }, + { + "epoch": 1.845669096750278, + "grad_norm": 1.2679669997107472, + "learning_rate": 3.273051945266836e-07, + "loss": 0.2472834438085556, + "step": 3735 + }, + { + "epoch": 1.8461633510441122, + "grad_norm": 1.16756829401485, + "learning_rate": 3.2523512459352923e-07, + "loss": 0.20510706305503845, + "step": 3736 + }, + { + "epoch": 1.8466576053379464, + "grad_norm": 1.292644423038628, + "learning_rate": 3.231715133951707e-07, + "loss": 0.2331993281841278, + "step": 3737 + }, + { + "epoch": 1.8471518596317806, + "grad_norm": 1.4584815860954135, + "learning_rate": 3.211143623092461e-07, + "loss": 0.2704228162765503, + "step": 3738 + }, + { + "epoch": 1.8476461139256148, + "grad_norm": 1.4579018041488718, + "learning_rate": 3.190636727090768e-07, + "loss": 0.2514714002609253, + "step": 3739 + }, + { + "epoch": 1.848140368219449, + "grad_norm": 1.258977256920419, + "learning_rate": 3.170194459636777e-07, + "loss": 0.2396089732646942, + "step": 3740 + }, + { + "epoch": 1.848634622513283, + "grad_norm": 1.4139144003983488, + "learning_rate": 3.149816834377428e-07, + "loss": 0.266484797000885, + "step": 3741 + }, + { + "epoch": 1.8491288768071172, + "grad_norm": 1.338105672337281, + "learning_rate": 3.129503864916539e-07, + "loss": 0.24549749493598938, + "step": 3742 + }, + { + "epoch": 1.8496231311009514, + "grad_norm": 1.6902480251834826, + "learning_rate": 3.1092555648147615e-07, + "loss": 0.2659090757369995, + "step": 3743 + }, + { + "epoch": 1.8501173853947857, + "grad_norm": 1.4018081288366548, + "learning_rate": 3.0890719475895615e-07, + "loss": 0.2756732702255249, + "step": 3744 + }, + { + "epoch": 1.8506116396886196, + "grad_norm": 1.3509953718874834, + "learning_rate": 3.068953026715238e-07, + "loss": 0.2568710148334503, + "step": 3745 + }, + { + "epoch": 1.8511058939824538, + "grad_norm": 1.3512798325752944, + "learning_rate": 3.048898815622914e-07, + "loss": 0.2255566120147705, + "step": 3746 + }, + { + "epoch": 1.851600148276288, + "grad_norm": 1.309385732750396, + "learning_rate": 3.028909327700458e-07, + "loss": 0.2083941102027893, + "step": 3747 + }, + { + "epoch": 1.8520944025701223, + "grad_norm": 1.2287507621351796, + "learning_rate": 3.0089845762926063e-07, + "loss": 0.20739290118217468, + "step": 3748 + }, + { + "epoch": 1.8525886568639565, + "grad_norm": 1.2356251229389228, + "learning_rate": 2.989124574700819e-07, + "loss": 0.21835210919380188, + "step": 3749 + }, + { + "epoch": 1.8530829111577907, + "grad_norm": 1.312598409351232, + "learning_rate": 2.969329336183335e-07, + "loss": 0.2170596569776535, + "step": 3750 + }, + { + "epoch": 1.8535771654516249, + "grad_norm": 1.3990932569701935, + "learning_rate": 2.949598873955184e-07, + "loss": 0.23584111034870148, + "step": 3751 + }, + { + "epoch": 1.854071419745459, + "grad_norm": 1.5531646127161125, + "learning_rate": 2.9299332011881623e-07, + "loss": 0.2690342664718628, + "step": 3752 + }, + { + "epoch": 1.8545656740392933, + "grad_norm": 1.2634424740078676, + "learning_rate": 2.9103323310107566e-07, + "loss": 0.2499091923236847, + "step": 3753 + }, + { + "epoch": 1.8550599283331275, + "grad_norm": 1.417744173198578, + "learning_rate": 2.8907962765082567e-07, + "loss": 0.23112377524375916, + "step": 3754 + }, + { + "epoch": 1.8555541826269617, + "grad_norm": 1.375590332914505, + "learning_rate": 2.8713250507226285e-07, + "loss": 0.25203657150268555, + "step": 3755 + }, + { + "epoch": 1.856048436920796, + "grad_norm": 1.4015552448571456, + "learning_rate": 2.8519186666526086e-07, + "loss": 0.2468508780002594, + "step": 3756 + }, + { + "epoch": 1.85654269121463, + "grad_norm": 1.427563584784084, + "learning_rate": 2.8325771372536e-07, + "loss": 0.22745928168296814, + "step": 3757 + }, + { + "epoch": 1.857036945508464, + "grad_norm": 1.2932963376428803, + "learning_rate": 2.8133004754377525e-07, + "loss": 0.23090660572052002, + "step": 3758 + }, + { + "epoch": 1.8575311998022983, + "grad_norm": 1.420318152152914, + "learning_rate": 2.7940886940738707e-07, + "loss": 0.27513352036476135, + "step": 3759 + }, + { + "epoch": 1.8580254540961325, + "grad_norm": 1.4517333399175874, + "learning_rate": 2.774941805987474e-07, + "loss": 0.25791019201278687, + "step": 3760 + }, + { + "epoch": 1.8585197083899665, + "grad_norm": 1.523404531013776, + "learning_rate": 2.75585982396076e-07, + "loss": 0.2703961730003357, + "step": 3761 + }, + { + "epoch": 1.8590139626838007, + "grad_norm": 1.4198437134006967, + "learning_rate": 2.736842760732561e-07, + "loss": 0.2557608485221863, + "step": 3762 + }, + { + "epoch": 1.859508216977635, + "grad_norm": 1.4276231211370918, + "learning_rate": 2.717890628998421e-07, + "loss": 0.26276740431785583, + "step": 3763 + }, + { + "epoch": 1.8600024712714691, + "grad_norm": 1.3830597360775128, + "learning_rate": 2.699003441410508e-07, + "loss": 0.3033446967601776, + "step": 3764 + }, + { + "epoch": 1.8604967255653033, + "grad_norm": 1.3975518004533982, + "learning_rate": 2.680181210577637e-07, + "loss": 0.2513597905635834, + "step": 3765 + }, + { + "epoch": 1.8609909798591375, + "grad_norm": 1.2527716887935596, + "learning_rate": 2.661423949065267e-07, + "loss": 0.22935059666633606, + "step": 3766 + }, + { + "epoch": 1.8614852341529717, + "grad_norm": 1.5028347517247218, + "learning_rate": 2.6427316693954596e-07, + "loss": 0.2585369348526001, + "step": 3767 + }, + { + "epoch": 1.861979488446806, + "grad_norm": 1.4129565265857094, + "learning_rate": 2.6241043840469104e-07, + "loss": 0.25701645016670227, + "step": 3768 + }, + { + "epoch": 1.8624737427406401, + "grad_norm": 1.304405538262163, + "learning_rate": 2.605542105454961e-07, + "loss": 0.24622182548046112, + "step": 3769 + }, + { + "epoch": 1.8629679970344744, + "grad_norm": 1.380891732165765, + "learning_rate": 2.5870448460114994e-07, + "loss": 0.2650758624076843, + "step": 3770 + }, + { + "epoch": 1.8634622513283086, + "grad_norm": 1.4721649336836553, + "learning_rate": 2.568612618065036e-07, + "loss": 0.2364269644021988, + "step": 3771 + }, + { + "epoch": 1.8639565056221425, + "grad_norm": 1.2217358212004363, + "learning_rate": 2.5502454339206617e-07, + "loss": 0.23226915299892426, + "step": 3772 + }, + { + "epoch": 1.8644507599159768, + "grad_norm": 1.3407554644381927, + "learning_rate": 2.5319433058400565e-07, + "loss": 0.23077306151390076, + "step": 3773 + }, + { + "epoch": 1.864945014209811, + "grad_norm": 1.289395146095016, + "learning_rate": 2.5137062460414476e-07, + "loss": 0.23707103729248047, + "step": 3774 + }, + { + "epoch": 1.8654392685036452, + "grad_norm": 1.3571808886592325, + "learning_rate": 2.4955342666996505e-07, + "loss": 0.268571138381958, + "step": 3775 + }, + { + "epoch": 1.8659335227974791, + "grad_norm": 1.4298616373621023, + "learning_rate": 2.4774273799459847e-07, + "loss": 0.21469517052173615, + "step": 3776 + }, + { + "epoch": 1.8664277770913134, + "grad_norm": 1.302386517113681, + "learning_rate": 2.45938559786838e-07, + "loss": 0.2513999938964844, + "step": 3777 + }, + { + "epoch": 1.8669220313851476, + "grad_norm": 1.2688339559395354, + "learning_rate": 2.44140893251128e-07, + "loss": 0.23660680651664734, + "step": 3778 + }, + { + "epoch": 1.8674162856789818, + "grad_norm": 1.499995655954345, + "learning_rate": 2.423497395875618e-07, + "loss": 0.24594557285308838, + "step": 3779 + }, + { + "epoch": 1.867910539972816, + "grad_norm": 1.4315211319459857, + "learning_rate": 2.405650999918896e-07, + "loss": 0.2725435793399811, + "step": 3780 + }, + { + "epoch": 1.8684047942666502, + "grad_norm": 1.3565937935517103, + "learning_rate": 2.3878697565551167e-07, + "loss": 0.25718316435813904, + "step": 3781 + }, + { + "epoch": 1.8688990485604844, + "grad_norm": 1.3523272274009415, + "learning_rate": 2.3701536776547851e-07, + "loss": 0.2546181082725525, + "step": 3782 + }, + { + "epoch": 1.8693933028543186, + "grad_norm": 1.1875597307843324, + "learning_rate": 2.3525027750448959e-07, + "loss": 0.22146770358085632, + "step": 3783 + }, + { + "epoch": 1.8698875571481528, + "grad_norm": 1.5616036933474096, + "learning_rate": 2.3349170605089456e-07, + "loss": 0.23873519897460938, + "step": 3784 + }, + { + "epoch": 1.870381811441987, + "grad_norm": 1.3056198220614723, + "learning_rate": 2.3173965457868875e-07, + "loss": 0.2530808746814728, + "step": 3785 + }, + { + "epoch": 1.8708760657358212, + "grad_norm": 1.5174642956273923, + "learning_rate": 2.2999412425751987e-07, + "loss": 0.21616236865520477, + "step": 3786 + }, + { + "epoch": 1.8713703200296554, + "grad_norm": 1.3867713509711206, + "learning_rate": 2.2825511625267583e-07, + "loss": 0.21596969664096832, + "step": 3787 + }, + { + "epoch": 1.8718645743234894, + "grad_norm": 1.4557650561795843, + "learning_rate": 2.265226317250957e-07, + "loss": 0.25873616337776184, + "step": 3788 + }, + { + "epoch": 1.8723588286173236, + "grad_norm": 1.3108065941801126, + "learning_rate": 2.247966718313599e-07, + "loss": 0.21096865832805634, + "step": 3789 + }, + { + "epoch": 1.8728530829111578, + "grad_norm": 1.374596799099242, + "learning_rate": 2.230772377236956e-07, + "loss": 0.2159111499786377, + "step": 3790 + }, + { + "epoch": 1.8733473372049918, + "grad_norm": 1.3658642346441578, + "learning_rate": 2.213643305499724e-07, + "loss": 0.2264566719532013, + "step": 3791 + }, + { + "epoch": 1.873841591498826, + "grad_norm": 1.2529368730648867, + "learning_rate": 2.1965795145370338e-07, + "loss": 0.216034397482872, + "step": 3792 + }, + { + "epoch": 1.8743358457926602, + "grad_norm": 1.2144868387665828, + "learning_rate": 2.1795810157404063e-07, + "loss": 0.22257745265960693, + "step": 3793 + }, + { + "epoch": 1.8748301000864944, + "grad_norm": 1.5075158608293073, + "learning_rate": 2.1626478204578082e-07, + "loss": 0.2569161653518677, + "step": 3794 + }, + { + "epoch": 1.8753243543803286, + "grad_norm": 1.3028902539101006, + "learning_rate": 2.1457799399936087e-07, + "loss": 0.24172556400299072, + "step": 3795 + }, + { + "epoch": 1.8758186086741628, + "grad_norm": 1.4100197142967315, + "learning_rate": 2.128977385608555e-07, + "loss": 0.25539106130599976, + "step": 3796 + }, + { + "epoch": 1.876312862967997, + "grad_norm": 1.3564195764364628, + "learning_rate": 2.1122401685197747e-07, + "loss": 0.23766650259494781, + "step": 3797 + }, + { + "epoch": 1.8768071172618312, + "grad_norm": 2.0847437292387516, + "learning_rate": 2.095568299900841e-07, + "loss": 0.24102288484573364, + "step": 3798 + }, + { + "epoch": 1.8773013715556655, + "grad_norm": 1.4163898812472968, + "learning_rate": 2.0789617908816063e-07, + "loss": 0.25168395042419434, + "step": 3799 + }, + { + "epoch": 1.8777956258494997, + "grad_norm": 1.2853968722580162, + "learning_rate": 2.0624206525483582e-07, + "loss": 0.23417149484157562, + "step": 3800 + }, + { + "epoch": 1.8782898801433339, + "grad_norm": 1.4002834822702614, + "learning_rate": 2.04594489594373e-07, + "loss": 0.2875264883041382, + "step": 3801 + }, + { + "epoch": 1.878784134437168, + "grad_norm": 1.3714454637927955, + "learning_rate": 2.0295345320667014e-07, + "loss": 0.24828693270683289, + "step": 3802 + }, + { + "epoch": 1.879278388731002, + "grad_norm": 1.3521250596424406, + "learning_rate": 2.013189571872587e-07, + "loss": 0.23279064893722534, + "step": 3803 + }, + { + "epoch": 1.8797726430248363, + "grad_norm": 1.1425181629308492, + "learning_rate": 1.996910026273058e-07, + "loss": 0.2099420577287674, + "step": 3804 + }, + { + "epoch": 1.8802668973186705, + "grad_norm": 1.346362344532125, + "learning_rate": 1.9806959061360985e-07, + "loss": 0.25043174624443054, + "step": 3805 + }, + { + "epoch": 1.8807611516125047, + "grad_norm": 1.3680517059526944, + "learning_rate": 1.9645472222860286e-07, + "loss": 0.2606011927127838, + "step": 3806 + }, + { + "epoch": 1.8812554059063387, + "grad_norm": 1.2606250431650987, + "learning_rate": 1.948463985503468e-07, + "loss": 0.22487565875053406, + "step": 3807 + }, + { + "epoch": 1.8817496602001729, + "grad_norm": 1.6823729371263936, + "learning_rate": 1.9324462065253735e-07, + "loss": 0.29611343145370483, + "step": 3808 + }, + { + "epoch": 1.882243914494007, + "grad_norm": 1.282763458334529, + "learning_rate": 1.9164938960449685e-07, + "loss": 0.2301706224679947, + "step": 3809 + }, + { + "epoch": 1.8827381687878413, + "grad_norm": 1.319243063789466, + "learning_rate": 1.9006070647118015e-07, + "loss": 0.2306794822216034, + "step": 3810 + }, + { + "epoch": 1.8832324230816755, + "grad_norm": 1.4208055299495237, + "learning_rate": 1.884785723131688e-07, + "loss": 0.2588786482810974, + "step": 3811 + }, + { + "epoch": 1.8837266773755097, + "grad_norm": 1.527285475263959, + "learning_rate": 1.8690298818667463e-07, + "loss": 0.2795346677303314, + "step": 3812 + }, + { + "epoch": 1.884220931669344, + "grad_norm": 1.2499989201376016, + "learning_rate": 1.853339551435318e-07, + "loss": 0.2313271164894104, + "step": 3813 + }, + { + "epoch": 1.884715185963178, + "grad_norm": 1.4803115521216077, + "learning_rate": 1.8377147423120467e-07, + "loss": 0.22814632952213287, + "step": 3814 + }, + { + "epoch": 1.8852094402570123, + "grad_norm": 1.3259243101199787, + "learning_rate": 1.822155464927866e-07, + "loss": 0.2605836093425751, + "step": 3815 + }, + { + "epoch": 1.8857036945508465, + "grad_norm": 1.3976508324913761, + "learning_rate": 1.8066617296699007e-07, + "loss": 0.23902952671051025, + "step": 3816 + }, + { + "epoch": 1.8861979488446807, + "grad_norm": 1.290435692515394, + "learning_rate": 1.7912335468815545e-07, + "loss": 0.24895761907100677, + "step": 3817 + }, + { + "epoch": 1.8866922031385147, + "grad_norm": 1.4446135232841222, + "learning_rate": 1.7758709268624664e-07, + "loss": 0.24108648300170898, + "step": 3818 + }, + { + "epoch": 1.887186457432349, + "grad_norm": 1.4071508146495701, + "learning_rate": 1.7605738798684767e-07, + "loss": 0.2600073516368866, + "step": 3819 + }, + { + "epoch": 1.8876807117261831, + "grad_norm": 1.3261487318829528, + "learning_rate": 1.745342416111706e-07, + "loss": 0.21564190089702606, + "step": 3820 + }, + { + "epoch": 1.8881749660200173, + "grad_norm": 1.4577577895280622, + "learning_rate": 1.7301765457604647e-07, + "loss": 0.24080556631088257, + "step": 3821 + }, + { + "epoch": 1.8886692203138513, + "grad_norm": 1.316642170468449, + "learning_rate": 1.7150762789392316e-07, + "loss": 0.22631056606769562, + "step": 3822 + }, + { + "epoch": 1.8891634746076855, + "grad_norm": 1.4341533325292704, + "learning_rate": 1.7000416257287654e-07, + "loss": 0.26355087757110596, + "step": 3823 + }, + { + "epoch": 1.8896577289015197, + "grad_norm": 1.387410149780388, + "learning_rate": 1.685072596165982e-07, + "loss": 0.248369500041008, + "step": 3824 + }, + { + "epoch": 1.890151983195354, + "grad_norm": 1.4331472853704903, + "learning_rate": 1.670169200243976e-07, + "loss": 0.2789249122142792, + "step": 3825 + }, + { + "epoch": 1.8906462374891881, + "grad_norm": 1.2052406993380367, + "learning_rate": 1.6553314479120453e-07, + "loss": 0.22493675351142883, + "step": 3826 + }, + { + "epoch": 1.8911404917830223, + "grad_norm": 1.2074956449276386, + "learning_rate": 1.6405593490756766e-07, + "loss": 0.21274074912071228, + "step": 3827 + }, + { + "epoch": 1.8916347460768566, + "grad_norm": 1.3986179942656674, + "learning_rate": 1.6258529135964928e-07, + "loss": 0.2591193914413452, + "step": 3828 + }, + { + "epoch": 1.8921290003706908, + "grad_norm": 1.5077061888652343, + "learning_rate": 1.6112121512923075e-07, + "loss": 0.2791387140750885, + "step": 3829 + }, + { + "epoch": 1.892623254664525, + "grad_norm": 1.449596307066075, + "learning_rate": 1.5966370719371015e-07, + "loss": 0.2840545177459717, + "step": 3830 + }, + { + "epoch": 1.8931175089583592, + "grad_norm": 1.538114321399184, + "learning_rate": 1.582127685260948e-07, + "loss": 0.2563555836677551, + "step": 3831 + }, + { + "epoch": 1.8936117632521934, + "grad_norm": 1.2897284655116197, + "learning_rate": 1.5676840009501538e-07, + "loss": 0.22912704944610596, + "step": 3832 + }, + { + "epoch": 1.8941060175460276, + "grad_norm": 1.3733822665309192, + "learning_rate": 1.5533060286470837e-07, + "loss": 0.25490787625312805, + "step": 3833 + }, + { + "epoch": 1.8946002718398616, + "grad_norm": 1.2282031018618578, + "learning_rate": 1.5389937779502818e-07, + "loss": 0.21826709806919098, + "step": 3834 + }, + { + "epoch": 1.8950945261336958, + "grad_norm": 1.303626845787231, + "learning_rate": 1.524747258414394e-07, + "loss": 0.2292749583721161, + "step": 3835 + }, + { + "epoch": 1.89558878042753, + "grad_norm": 1.3359905611934206, + "learning_rate": 1.5105664795501908e-07, + "loss": 0.24652332067489624, + "step": 3836 + }, + { + "epoch": 1.8960830347213642, + "grad_norm": 1.3777956922677133, + "learning_rate": 1.4964514508245652e-07, + "loss": 0.25154706835746765, + "step": 3837 + }, + { + "epoch": 1.8965772890151982, + "grad_norm": 1.3722697572324272, + "learning_rate": 1.482402181660525e-07, + "loss": 0.2414158582687378, + "step": 3838 + }, + { + "epoch": 1.8970715433090324, + "grad_norm": 1.3960215733148371, + "learning_rate": 1.4684186814371225e-07, + "loss": 0.22421908378601074, + "step": 3839 + }, + { + "epoch": 1.8975657976028666, + "grad_norm": 1.3337706977662172, + "learning_rate": 1.4545009594895687e-07, + "loss": 0.2506029009819031, + "step": 3840 + }, + { + "epoch": 1.8980600518967008, + "grad_norm": 1.239516400526973, + "learning_rate": 1.440649025109142e-07, + "loss": 0.2011726200580597, + "step": 3841 + }, + { + "epoch": 1.898554306190535, + "grad_norm": 1.5242598019660087, + "learning_rate": 1.4268628875431677e-07, + "loss": 0.27702796459198, + "step": 3842 + }, + { + "epoch": 1.8990485604843692, + "grad_norm": 1.236260659855922, + "learning_rate": 1.413142555995095e-07, + "loss": 0.23884715139865875, + "step": 3843 + }, + { + "epoch": 1.8995428147782034, + "grad_norm": 1.2385068593263413, + "learning_rate": 1.3994880396244304e-07, + "loss": 0.2191702425479889, + "step": 3844 + }, + { + "epoch": 1.9000370690720376, + "grad_norm": 1.3532676134331167, + "learning_rate": 1.385899347546704e-07, + "loss": 0.25425833463668823, + "step": 3845 + }, + { + "epoch": 1.9005313233658718, + "grad_norm": 1.3452712776781028, + "learning_rate": 1.37237648883356e-07, + "loss": 0.23355990648269653, + "step": 3846 + }, + { + "epoch": 1.901025577659706, + "grad_norm": 1.200878562022238, + "learning_rate": 1.3589194725126542e-07, + "loss": 0.2079685628414154, + "step": 3847 + }, + { + "epoch": 1.9015198319535402, + "grad_norm": 1.380798956497921, + "learning_rate": 1.3455283075676895e-07, + "loss": 0.25126928091049194, + "step": 3848 + }, + { + "epoch": 1.9020140862473742, + "grad_norm": 1.3306751541769635, + "learning_rate": 1.332203002938437e-07, + "loss": 0.2608864903450012, + "step": 3849 + }, + { + "epoch": 1.9025083405412084, + "grad_norm": 1.3536846944777874, + "learning_rate": 1.3189435675206697e-07, + "loss": 0.27048414945602417, + "step": 3850 + }, + { + "epoch": 1.9030025948350426, + "grad_norm": 1.3873264194773522, + "learning_rate": 1.3057500101661846e-07, + "loss": 0.24350577592849731, + "step": 3851 + }, + { + "epoch": 1.9034968491288768, + "grad_norm": 1.5060374095399143, + "learning_rate": 1.2926223396828363e-07, + "loss": 0.23283880949020386, + "step": 3852 + }, + { + "epoch": 1.9039911034227108, + "grad_norm": 1.3722502195381412, + "learning_rate": 1.2795605648344477e-07, + "loss": 0.23332493007183075, + "step": 3853 + }, + { + "epoch": 1.904485357716545, + "grad_norm": 1.2805992535782373, + "learning_rate": 1.2665646943408882e-07, + "loss": 0.19833901524543762, + "step": 3854 + }, + { + "epoch": 1.9049796120103792, + "grad_norm": 1.316108497317141, + "learning_rate": 1.2536347368780066e-07, + "loss": 0.23650333285331726, + "step": 3855 + }, + { + "epoch": 1.9054738663042134, + "grad_norm": 1.1749486485284195, + "learning_rate": 1.240770701077665e-07, + "loss": 0.20151859521865845, + "step": 3856 + }, + { + "epoch": 1.9059681205980477, + "grad_norm": 1.4620220273758984, + "learning_rate": 1.2279725955277044e-07, + "loss": 0.32347559928894043, + "step": 3857 + }, + { + "epoch": 1.9064623748918819, + "grad_norm": 1.2726582104041342, + "learning_rate": 1.215240428771969e-07, + "loss": 0.25937923789024353, + "step": 3858 + }, + { + "epoch": 1.906956629185716, + "grad_norm": 1.6959402751075685, + "learning_rate": 1.2025742093102477e-07, + "loss": 0.2648822069168091, + "step": 3859 + }, + { + "epoch": 1.9074508834795503, + "grad_norm": 1.4639245582336404, + "learning_rate": 1.1899739455983327e-07, + "loss": 0.27612054347991943, + "step": 3860 + }, + { + "epoch": 1.9079451377733845, + "grad_norm": 1.32342317481008, + "learning_rate": 1.1774396460480064e-07, + "loss": 0.2204264998435974, + "step": 3861 + }, + { + "epoch": 1.9084393920672187, + "grad_norm": 1.4448526349141402, + "learning_rate": 1.164971319026964e-07, + "loss": 0.2719968557357788, + "step": 3862 + }, + { + "epoch": 1.908933646361053, + "grad_norm": 1.3288093626980793, + "learning_rate": 1.1525689728588807e-07, + "loss": 0.2308243364095688, + "step": 3863 + }, + { + "epoch": 1.909427900654887, + "grad_norm": 1.405242953564276, + "learning_rate": 1.1402326158234e-07, + "loss": 0.23281638324260712, + "step": 3864 + }, + { + "epoch": 1.909922154948721, + "grad_norm": 1.553800687505842, + "learning_rate": 1.127962256156101e-07, + "loss": 0.26273444294929504, + "step": 3865 + }, + { + "epoch": 1.9104164092425553, + "grad_norm": 1.3311046226223713, + "learning_rate": 1.1157579020484755e-07, + "loss": 0.26783496141433716, + "step": 3866 + }, + { + "epoch": 1.9109106635363895, + "grad_norm": 1.4482920311066827, + "learning_rate": 1.1036195616480061e-07, + "loss": 0.2575075626373291, + "step": 3867 + }, + { + "epoch": 1.9114049178302237, + "grad_norm": 1.3313207733281058, + "learning_rate": 1.0915472430580443e-07, + "loss": 0.24802085757255554, + "step": 3868 + }, + { + "epoch": 1.9118991721240577, + "grad_norm": 1.230518560175702, + "learning_rate": 1.0795409543379099e-07, + "loss": 0.22017821669578552, + "step": 3869 + }, + { + "epoch": 1.912393426417892, + "grad_norm": 1.3804831257002024, + "learning_rate": 1.0676007035028579e-07, + "loss": 0.2525743246078491, + "step": 3870 + }, + { + "epoch": 1.912887680711726, + "grad_norm": 1.5674388988470875, + "learning_rate": 1.05572649852399e-07, + "loss": 0.26704782247543335, + "step": 3871 + }, + { + "epoch": 1.9133819350055603, + "grad_norm": 3.430480948746706, + "learning_rate": 1.0439183473283654e-07, + "loss": 0.25393134355545044, + "step": 3872 + }, + { + "epoch": 1.9138761892993945, + "grad_norm": 1.4465108879454651, + "learning_rate": 1.0321762577989448e-07, + "loss": 0.27266988158226013, + "step": 3873 + }, + { + "epoch": 1.9143704435932287, + "grad_norm": 1.366912603525092, + "learning_rate": 1.0205002377745799e-07, + "loss": 0.2694425582885742, + "step": 3874 + }, + { + "epoch": 1.914864697887063, + "grad_norm": 1.394500016346508, + "learning_rate": 1.0088902950500023e-07, + "loss": 0.28820598125457764, + "step": 3875 + }, + { + "epoch": 1.9153589521808971, + "grad_norm": 1.3050023577266547, + "learning_rate": 9.973464373758679e-08, + "loss": 0.2194051444530487, + "step": 3876 + }, + { + "epoch": 1.9158532064747313, + "grad_norm": 1.3831603392475145, + "learning_rate": 9.858686724586675e-08, + "loss": 0.25639402866363525, + "step": 3877 + }, + { + "epoch": 1.9163474607685655, + "grad_norm": 1.2744346736321277, + "learning_rate": 9.744570079608051e-08, + "loss": 0.23420584201812744, + "step": 3878 + }, + { + "epoch": 1.9168417150623998, + "grad_norm": 1.38639151316596, + "learning_rate": 9.631114515005425e-08, + "loss": 0.2514578700065613, + "step": 3879 + }, + { + "epoch": 1.9173359693562337, + "grad_norm": 1.296540814966686, + "learning_rate": 9.518320106520096e-08, + "loss": 0.2223532646894455, + "step": 3880 + }, + { + "epoch": 1.917830223650068, + "grad_norm": 1.367450022954602, + "learning_rate": 9.406186929451943e-08, + "loss": 0.21725934743881226, + "step": 3881 + }, + { + "epoch": 1.9183244779439022, + "grad_norm": 1.2939049219304557, + "learning_rate": 9.294715058659531e-08, + "loss": 0.2081519365310669, + "step": 3882 + }, + { + "epoch": 1.9188187322377364, + "grad_norm": 1.4148048553245687, + "learning_rate": 9.183904568559998e-08, + "loss": 0.23683780431747437, + "step": 3883 + }, + { + "epoch": 1.9193129865315703, + "grad_norm": 1.3217345576155297, + "learning_rate": 9.073755533128725e-08, + "loss": 0.26095467805862427, + "step": 3884 + }, + { + "epoch": 1.9198072408254045, + "grad_norm": 1.253461281568054, + "learning_rate": 8.964268025899558e-08, + "loss": 0.24427568912506104, + "step": 3885 + }, + { + "epoch": 1.9203014951192388, + "grad_norm": 1.3603609343742546, + "learning_rate": 8.855442119964919e-08, + "loss": 0.23549365997314453, + "step": 3886 + }, + { + "epoch": 1.920795749413073, + "grad_norm": 1.4769071310965274, + "learning_rate": 8.74727788797547e-08, + "loss": 0.2645740807056427, + "step": 3887 + }, + { + "epoch": 1.9212900037069072, + "grad_norm": 1.3315198325383535, + "learning_rate": 8.639775402139894e-08, + "loss": 0.22890612483024597, + "step": 3888 + }, + { + "epoch": 1.9217842580007414, + "grad_norm": 1.4439303401955232, + "learning_rate": 8.532934734225451e-08, + "loss": 0.23417067527770996, + "step": 3889 + }, + { + "epoch": 1.9222785122945756, + "grad_norm": 1.3482339584478593, + "learning_rate": 8.42675595555753e-08, + "loss": 0.26125872135162354, + "step": 3890 + }, + { + "epoch": 1.9227727665884098, + "grad_norm": 1.4420298418522868, + "learning_rate": 8.321239137019433e-08, + "loss": 0.26559343934059143, + "step": 3891 + }, + { + "epoch": 1.923267020882244, + "grad_norm": 1.188066329993037, + "learning_rate": 8.216384349052809e-08, + "loss": 0.2033136785030365, + "step": 3892 + }, + { + "epoch": 1.9237612751760782, + "grad_norm": 1.975689815636208, + "learning_rate": 8.112191661656999e-08, + "loss": 0.2750868797302246, + "step": 3893 + }, + { + "epoch": 1.9242555294699124, + "grad_norm": 1.366292176712638, + "learning_rate": 8.008661144389807e-08, + "loss": 0.2082993984222412, + "step": 3894 + }, + { + "epoch": 1.9247497837637466, + "grad_norm": 1.4608755297303442, + "learning_rate": 7.905792866366501e-08, + "loss": 0.2495439350605011, + "step": 3895 + }, + { + "epoch": 1.9252440380575806, + "grad_norm": 1.4141233844295813, + "learning_rate": 7.803586896260707e-08, + "loss": 0.25609591603279114, + "step": 3896 + }, + { + "epoch": 1.9257382923514148, + "grad_norm": 1.5334004898395663, + "learning_rate": 7.702043302303397e-08, + "loss": 0.25372135639190674, + "step": 3897 + }, + { + "epoch": 1.926232546645249, + "grad_norm": 1.3368221554281705, + "learning_rate": 7.601162152283904e-08, + "loss": 0.21882784366607666, + "step": 3898 + }, + { + "epoch": 1.926726800939083, + "grad_norm": 1.5284992426615736, + "learning_rate": 7.500943513548797e-08, + "loss": 0.24513296782970428, + "step": 3899 + }, + { + "epoch": 1.9272210552329172, + "grad_norm": 1.3036631509681367, + "learning_rate": 7.401387453002673e-08, + "loss": 0.23508042097091675, + "step": 3900 + }, + { + "epoch": 1.9277153095267514, + "grad_norm": 1.2751462486235168, + "learning_rate": 7.30249403710792e-08, + "loss": 0.2288282811641693, + "step": 3901 + }, + { + "epoch": 1.9282095638205856, + "grad_norm": 1.4342484579443016, + "learning_rate": 7.204263331884175e-08, + "loss": 0.24606133997440338, + "step": 3902 + }, + { + "epoch": 1.9287038181144198, + "grad_norm": 1.3623815600739415, + "learning_rate": 7.10669540290887e-08, + "loss": 0.2710507810115814, + "step": 3903 + }, + { + "epoch": 1.929198072408254, + "grad_norm": 1.3748292603956795, + "learning_rate": 7.009790315317122e-08, + "loss": 0.27333927154541016, + "step": 3904 + }, + { + "epoch": 1.9296923267020882, + "grad_norm": 1.3028025790213729, + "learning_rate": 6.913548133801074e-08, + "loss": 0.27518531680107117, + "step": 3905 + }, + { + "epoch": 1.9301865809959224, + "grad_norm": 1.3661226136758882, + "learning_rate": 6.817968922610884e-08, + "loss": 0.24289458990097046, + "step": 3906 + }, + { + "epoch": 1.9306808352897566, + "grad_norm": 1.3726485965253954, + "learning_rate": 6.723052745553848e-08, + "loss": 0.225175678730011, + "step": 3907 + }, + { + "epoch": 1.9311750895835909, + "grad_norm": 1.4678815751521954, + "learning_rate": 6.628799665994612e-08, + "loss": 0.2592085599899292, + "step": 3908 + }, + { + "epoch": 1.931669343877425, + "grad_norm": 1.5719300045981148, + "learning_rate": 6.535209746855064e-08, + "loss": 0.2649756968021393, + "step": 3909 + }, + { + "epoch": 1.9321635981712593, + "grad_norm": 1.3380899824561678, + "learning_rate": 6.442283050614673e-08, + "loss": 0.2318311631679535, + "step": 3910 + }, + { + "epoch": 1.9326578524650933, + "grad_norm": 1.6231265342953554, + "learning_rate": 6.350019639309923e-08, + "loss": 0.252924382686615, + "step": 3911 + }, + { + "epoch": 1.9331521067589275, + "grad_norm": 1.1670510769577984, + "learning_rate": 6.258419574534547e-08, + "loss": 0.1903652548789978, + "step": 3912 + }, + { + "epoch": 1.9336463610527617, + "grad_norm": 1.270678601269557, + "learning_rate": 6.167482917439404e-08, + "loss": 0.22795221209526062, + "step": 3913 + }, + { + "epoch": 1.9341406153465959, + "grad_norm": 1.5197318429157889, + "learning_rate": 6.077209728732492e-08, + "loss": 0.26521584391593933, + "step": 3914 + }, + { + "epoch": 1.9346348696404299, + "grad_norm": 1.272486350308544, + "learning_rate": 5.987600068679045e-08, + "loss": 0.22152049839496613, + "step": 3915 + }, + { + "epoch": 1.935129123934264, + "grad_norm": 1.2727416096160045, + "learning_rate": 5.898653997100989e-08, + "loss": 0.22663083672523499, + "step": 3916 + }, + { + "epoch": 1.9356233782280983, + "grad_norm": 1.3553153320714941, + "learning_rate": 5.8103715733776047e-08, + "loss": 0.23720389604568481, + "step": 3917 + }, + { + "epoch": 1.9361176325219325, + "grad_norm": 1.4063431471110097, + "learning_rate": 5.722752856444858e-08, + "loss": 0.24053935706615448, + "step": 3918 + }, + { + "epoch": 1.9366118868157667, + "grad_norm": 1.3614412415474415, + "learning_rate": 5.635797904795848e-08, + "loss": 0.26565641164779663, + "step": 3919 + }, + { + "epoch": 1.9371061411096009, + "grad_norm": 1.4288462330405298, + "learning_rate": 5.5495067764804736e-08, + "loss": 0.27181264758110046, + "step": 3920 + }, + { + "epoch": 1.937600395403435, + "grad_norm": 1.3077579832623365, + "learning_rate": 5.46387952910532e-08, + "loss": 0.23340710997581482, + "step": 3921 + }, + { + "epoch": 1.9380946496972693, + "grad_norm": 1.2207580518535108, + "learning_rate": 5.378916219833996e-08, + "loss": 0.19458985328674316, + "step": 3922 + }, + { + "epoch": 1.9385889039911035, + "grad_norm": 1.4167428327318625, + "learning_rate": 5.2946169053869066e-08, + "loss": 0.22900202870368958, + "step": 3923 + }, + { + "epoch": 1.9390831582849377, + "grad_norm": 1.528940034628332, + "learning_rate": 5.210981642040924e-08, + "loss": 0.30710160732269287, + "step": 3924 + }, + { + "epoch": 1.939577412578772, + "grad_norm": 1.1725653667546314, + "learning_rate": 5.12801048562972e-08, + "loss": 0.1754809319972992, + "step": 3925 + }, + { + "epoch": 1.940071666872606, + "grad_norm": 1.4924045607844934, + "learning_rate": 5.045703491543763e-08, + "loss": 0.28787121176719666, + "step": 3926 + }, + { + "epoch": 1.94056592116644, + "grad_norm": 1.2741088738360473, + "learning_rate": 4.96406071472999e-08, + "loss": 0.2239963263273239, + "step": 3927 + }, + { + "epoch": 1.9410601754602743, + "grad_norm": 1.3932338575101701, + "learning_rate": 4.883082209692025e-08, + "loss": 0.2121300995349884, + "step": 3928 + }, + { + "epoch": 1.9415544297541085, + "grad_norm": 1.4253326667240858, + "learning_rate": 4.802768030489735e-08, + "loss": 0.23445773124694824, + "step": 3929 + }, + { + "epoch": 1.9420486840479425, + "grad_norm": 1.3518230097115338, + "learning_rate": 4.7231182307400095e-08, + "loss": 0.2369021326303482, + "step": 3930 + }, + { + "epoch": 1.9425429383417767, + "grad_norm": 1.4790931523959723, + "learning_rate": 4.644132863615758e-08, + "loss": 0.2764047086238861, + "step": 3931 + }, + { + "epoch": 1.943037192635611, + "grad_norm": 1.3688921537083945, + "learning_rate": 4.565811981846468e-08, + "loss": 0.26021280884742737, + "step": 3932 + }, + { + "epoch": 1.9435314469294451, + "grad_norm": 1.4010339478163996, + "learning_rate": 4.488155637718095e-08, + "loss": 0.26012706756591797, + "step": 3933 + }, + { + "epoch": 1.9440257012232793, + "grad_norm": 1.4174814798438116, + "learning_rate": 4.4111638830729444e-08, + "loss": 0.22092604637145996, + "step": 3934 + }, + { + "epoch": 1.9445199555171135, + "grad_norm": 1.3803129729570953, + "learning_rate": 4.334836769309347e-08, + "loss": 0.24200648069381714, + "step": 3935 + }, + { + "epoch": 1.9450142098109477, + "grad_norm": 1.4185191605274636, + "learning_rate": 4.2591743473826554e-08, + "loss": 0.2545608580112457, + "step": 3936 + }, + { + "epoch": 1.945508464104782, + "grad_norm": 1.4140513212071641, + "learning_rate": 4.1841766678036854e-08, + "loss": 0.24908477067947388, + "step": 3937 + }, + { + "epoch": 1.9460027183986162, + "grad_norm": 1.3236421908105307, + "learning_rate": 4.109843780639833e-08, + "loss": 0.23568233847618103, + "step": 3938 + }, + { + "epoch": 1.9464969726924504, + "grad_norm": 1.4613354363975228, + "learning_rate": 4.0361757355147355e-08, + "loss": 0.2230791449546814, + "step": 3939 + }, + { + "epoch": 1.9469912269862846, + "grad_norm": 1.4124924138900457, + "learning_rate": 3.963172581608166e-08, + "loss": 0.2541523277759552, + "step": 3940 + }, + { + "epoch": 1.9474854812801188, + "grad_norm": 1.320376312149322, + "learning_rate": 3.8908343676559156e-08, + "loss": 0.2466837763786316, + "step": 3941 + }, + { + "epoch": 1.9479797355739528, + "grad_norm": 1.434845915637092, + "learning_rate": 3.819161141950134e-08, + "loss": 0.2700938880443573, + "step": 3942 + }, + { + "epoch": 1.948473989867787, + "grad_norm": 1.2847188951445323, + "learning_rate": 3.7481529523384355e-08, + "loss": 0.2353779673576355, + "step": 3943 + }, + { + "epoch": 1.9489682441616212, + "grad_norm": 1.4586624394757335, + "learning_rate": 3.677809846225344e-08, + "loss": 0.25708913803100586, + "step": 3944 + }, + { + "epoch": 1.9494624984554554, + "grad_norm": 1.3816118361393621, + "learning_rate": 3.6081318705705195e-08, + "loss": 0.26113903522491455, + "step": 3945 + }, + { + "epoch": 1.9499567527492894, + "grad_norm": 1.353452761923649, + "learning_rate": 3.539119071890307e-08, + "loss": 0.2561355531215668, + "step": 3946 + }, + { + "epoch": 1.9504510070431236, + "grad_norm": 1.3679281740557483, + "learning_rate": 3.470771496256409e-08, + "loss": 0.24893885850906372, + "step": 3947 + }, + { + "epoch": 1.9509452613369578, + "grad_norm": 1.3725754281016815, + "learning_rate": 3.403089189296771e-08, + "loss": 0.25399699807167053, + "step": 3948 + }, + { + "epoch": 1.951439515630792, + "grad_norm": 1.4355008801200986, + "learning_rate": 3.3360721961952505e-08, + "loss": 0.2820609509944916, + "step": 3949 + }, + { + "epoch": 1.9519337699246262, + "grad_norm": 1.222474026880474, + "learning_rate": 3.269720561691281e-08, + "loss": 0.22128066420555115, + "step": 3950 + }, + { + "epoch": 1.9524280242184604, + "grad_norm": 1.2322544942302993, + "learning_rate": 3.204034330080319e-08, + "loss": 0.2132534235715866, + "step": 3951 + }, + { + "epoch": 1.9529222785122946, + "grad_norm": 1.3290638165306805, + "learning_rate": 3.1390135452135095e-08, + "loss": 0.2308463454246521, + "step": 3952 + }, + { + "epoch": 1.9534165328061288, + "grad_norm": 1.443895071093895, + "learning_rate": 3.074658250497908e-08, + "loss": 0.2756718397140503, + "step": 3953 + }, + { + "epoch": 1.953910787099963, + "grad_norm": 1.404895730578394, + "learning_rate": 3.010968488896149e-08, + "loss": 0.24619412422180176, + "step": 3954 + }, + { + "epoch": 1.9544050413937972, + "grad_norm": 1.3236097287021305, + "learning_rate": 2.9479443029265532e-08, + "loss": 0.2164454162120819, + "step": 3955 + }, + { + "epoch": 1.9548992956876314, + "grad_norm": 1.4498814795200483, + "learning_rate": 2.8855857346632432e-08, + "loss": 0.2778991460800171, + "step": 3956 + }, + { + "epoch": 1.9553935499814654, + "grad_norm": 1.3781229461817452, + "learning_rate": 2.8238928257359188e-08, + "loss": 0.22639301419258118, + "step": 3957 + }, + { + "epoch": 1.9558878042752996, + "grad_norm": 1.3946690868287814, + "learning_rate": 2.7628656173297463e-08, + "loss": 0.2367630898952484, + "step": 3958 + }, + { + "epoch": 1.9563820585691338, + "grad_norm": 1.233715623675162, + "learning_rate": 2.702504150185692e-08, + "loss": 0.2400333285331726, + "step": 3959 + }, + { + "epoch": 1.956876312862968, + "grad_norm": 1.5197718598123784, + "learning_rate": 2.6428084646001884e-08, + "loss": 0.2384340763092041, + "step": 3960 + }, + { + "epoch": 1.957370567156802, + "grad_norm": 1.2187445449938668, + "learning_rate": 2.5837786004253572e-08, + "loss": 0.20191673934459686, + "step": 3961 + }, + { + "epoch": 1.9578648214506362, + "grad_norm": 1.3530782559852856, + "learning_rate": 2.525414597068565e-08, + "loss": 0.24700434505939484, + "step": 3962 + }, + { + "epoch": 1.9583590757444704, + "grad_norm": 1.2586448244620927, + "learning_rate": 2.4677164934928665e-08, + "loss": 0.20032359659671783, + "step": 3963 + }, + { + "epoch": 1.9588533300383046, + "grad_norm": 1.5212375132060378, + "learning_rate": 2.4106843282165615e-08, + "loss": 0.280154287815094, + "step": 3964 + }, + { + "epoch": 1.9593475843321388, + "grad_norm": 1.46590896106962, + "learning_rate": 2.3543181393135274e-08, + "loss": 0.25518566370010376, + "step": 3965 + }, + { + "epoch": 1.959841838625973, + "grad_norm": 1.2792917112791735, + "learning_rate": 2.298617964413108e-08, + "loss": 0.2246837019920349, + "step": 3966 + }, + { + "epoch": 1.9603360929198073, + "grad_norm": 1.3954375167289552, + "learning_rate": 2.2435838407000034e-08, + "loss": 0.23355916142463684, + "step": 3967 + }, + { + "epoch": 1.9608303472136415, + "grad_norm": 1.3615561015896285, + "learning_rate": 2.1892158049140467e-08, + "loss": 0.2449415922164917, + "step": 3968 + }, + { + "epoch": 1.9613246015074757, + "grad_norm": 1.279518283780108, + "learning_rate": 2.1355138933507602e-08, + "loss": 0.2269652783870697, + "step": 3969 + }, + { + "epoch": 1.9618188558013099, + "grad_norm": 1.4090731883758925, + "learning_rate": 2.0824781418605776e-08, + "loss": 0.26923638582229614, + "step": 3970 + }, + { + "epoch": 1.962313110095144, + "grad_norm": 1.3838329777907195, + "learning_rate": 2.0301085858493996e-08, + "loss": 0.2631189823150635, + "step": 3971 + }, + { + "epoch": 1.9628073643889783, + "grad_norm": 1.272147209216066, + "learning_rate": 1.978405260278593e-08, + "loss": 0.23281526565551758, + "step": 3972 + }, + { + "epoch": 1.9633016186828123, + "grad_norm": 1.4504818525258278, + "learning_rate": 1.9273681996644365e-08, + "loss": 0.26399385929107666, + "step": 3973 + }, + { + "epoch": 1.9637958729766465, + "grad_norm": 1.42867652212037, + "learning_rate": 1.876997438078454e-08, + "loss": 0.2641673684120178, + "step": 3974 + }, + { + "epoch": 1.9642901272704807, + "grad_norm": 1.2944638856965318, + "learning_rate": 1.8272930091476347e-08, + "loss": 0.22440402209758759, + "step": 3975 + }, + { + "epoch": 1.9647843815643147, + "grad_norm": 1.3585170311291963, + "learning_rate": 1.778254946053881e-08, + "loss": 0.2552195191383362, + "step": 3976 + }, + { + "epoch": 1.9652786358581489, + "grad_norm": 1.3475063805104281, + "learning_rate": 1.729883281534117e-08, + "loss": 0.24455100297927856, + "step": 3977 + }, + { + "epoch": 1.965772890151983, + "grad_norm": 1.4925946223112605, + "learning_rate": 1.6821780478808448e-08, + "loss": 0.2324603945016861, + "step": 3978 + }, + { + "epoch": 1.9662671444458173, + "grad_norm": 1.295320797137711, + "learning_rate": 1.6351392769412556e-08, + "loss": 0.25488242506980896, + "step": 3979 + }, + { + "epoch": 1.9667613987396515, + "grad_norm": 1.2382372998222446, + "learning_rate": 1.5887670001177856e-08, + "loss": 0.23511120676994324, + "step": 3980 + }, + { + "epoch": 1.9672556530334857, + "grad_norm": 1.474081111410746, + "learning_rate": 1.5430612483680052e-08, + "loss": 0.2683457136154175, + "step": 3981 + }, + { + "epoch": 1.96774990732732, + "grad_norm": 1.3558148882952648, + "learning_rate": 1.4980220522041734e-08, + "loss": 0.26627787947654724, + "step": 3982 + }, + { + "epoch": 1.9682441616211541, + "grad_norm": 1.3779286197554192, + "learning_rate": 1.4536494416940162e-08, + "loss": 0.22931841015815735, + "step": 3983 + }, + { + "epoch": 1.9687384159149883, + "grad_norm": 1.3860885624616435, + "learning_rate": 1.4099434464600603e-08, + "loss": 0.22918352484703064, + "step": 3984 + }, + { + "epoch": 1.9692326702088225, + "grad_norm": 1.498852903518302, + "learning_rate": 1.3669040956797442e-08, + "loss": 0.2542854845523834, + "step": 3985 + }, + { + "epoch": 1.9697269245026567, + "grad_norm": 1.278204707841908, + "learning_rate": 1.3245314180854175e-08, + "loss": 0.21581681072711945, + "step": 3986 + }, + { + "epoch": 1.970221178796491, + "grad_norm": 1.4943144749429917, + "learning_rate": 1.2828254419646746e-08, + "loss": 0.2708613872528076, + "step": 3987 + }, + { + "epoch": 1.970715433090325, + "grad_norm": 1.3377756042264306, + "learning_rate": 1.2417861951597998e-08, + "loss": 0.25348716974258423, + "step": 3988 + }, + { + "epoch": 1.9712096873841591, + "grad_norm": 1.396109244896111, + "learning_rate": 1.2014137050677665e-08, + "loss": 0.24585089087486267, + "step": 3989 + }, + { + "epoch": 1.9717039416779933, + "grad_norm": 1.3367696007925745, + "learning_rate": 1.1617079986410152e-08, + "loss": 0.26362112164497375, + "step": 3990 + }, + { + "epoch": 1.9721981959718276, + "grad_norm": 1.815729582105598, + "learning_rate": 1.1226691023862312e-08, + "loss": 0.23288659751415253, + "step": 3991 + }, + { + "epoch": 1.9726924502656615, + "grad_norm": 1.2305688182670602, + "learning_rate": 1.0842970423654563e-08, + "loss": 0.21604478359222412, + "step": 3992 + }, + { + "epoch": 1.9731867045594957, + "grad_norm": 1.353706093653017, + "learning_rate": 1.0465918441950885e-08, + "loss": 0.21149985492229462, + "step": 3993 + }, + { + "epoch": 1.97368095885333, + "grad_norm": 1.40480632228099, + "learning_rate": 1.0095535330467698e-08, + "loss": 0.26392504572868347, + "step": 3994 + }, + { + "epoch": 1.9741752131471642, + "grad_norm": 1.246030064073758, + "learning_rate": 9.731821336466107e-09, + "loss": 0.22993823885917664, + "step": 3995 + }, + { + "epoch": 1.9746694674409984, + "grad_norm": 1.247780565740116, + "learning_rate": 9.374776702757438e-09, + "loss": 0.2207789570093155, + "step": 3996 + }, + { + "epoch": 1.9751637217348326, + "grad_norm": 1.3761642168404886, + "learning_rate": 9.024401667698802e-09, + "loss": 0.27149268984794617, + "step": 3997 + }, + { + "epoch": 1.9756579760286668, + "grad_norm": 1.4326020240148696, + "learning_rate": 8.680696465196425e-09, + "loss": 0.269406795501709, + "step": 3998 + }, + { + "epoch": 1.976152230322501, + "grad_norm": 1.4639077922370294, + "learning_rate": 8.343661324703434e-09, + "loss": 0.25354713201522827, + "step": 3999 + }, + { + "epoch": 1.9766464846163352, + "grad_norm": 1.3417205540337154, + "learning_rate": 8.013296471217624e-09, + "loss": 0.22957751154899597, + "step": 4000 + }, + { + "epoch": 1.9771407389101694, + "grad_norm": 1.4456474308400453, + "learning_rate": 7.68960212528702e-09, + "loss": 0.25355982780456543, + "step": 4001 + }, + { + "epoch": 1.9776349932040036, + "grad_norm": 1.3644174652815564, + "learning_rate": 7.372578503005434e-09, + "loss": 0.2453315556049347, + "step": 4002 + }, + { + "epoch": 1.9781292474978376, + "grad_norm": 1.4020681279841292, + "learning_rate": 7.062225816013568e-09, + "loss": 0.2274405062198639, + "step": 4003 + }, + { + "epoch": 1.9786235017916718, + "grad_norm": 1.229857953320014, + "learning_rate": 6.7585442714979136e-09, + "loss": 0.22195965051651, + "step": 4004 + }, + { + "epoch": 1.979117756085506, + "grad_norm": 1.266569965950015, + "learning_rate": 6.461534072191855e-09, + "loss": 0.18664966523647308, + "step": 4005 + }, + { + "epoch": 1.9796120103793402, + "grad_norm": 1.3364190229696613, + "learning_rate": 6.171195416375675e-09, + "loss": 0.23385149240493774, + "step": 4006 + }, + { + "epoch": 1.9801062646731742, + "grad_norm": 1.4261084517387066, + "learning_rate": 5.887528497874328e-09, + "loss": 0.3141595721244812, + "step": 4007 + }, + { + "epoch": 1.9806005189670084, + "grad_norm": 1.1455773069888675, + "learning_rate": 5.610533506060778e-09, + "loss": 0.22392721474170685, + "step": 4008 + }, + { + "epoch": 1.9810947732608426, + "grad_norm": 1.3684844463884664, + "learning_rate": 5.34021062585377e-09, + "loss": 0.24451547861099243, + "step": 4009 + }, + { + "epoch": 1.9815890275546768, + "grad_norm": 1.4628440300051093, + "learning_rate": 5.076560037714506e-09, + "loss": 0.2916273772716522, + "step": 4010 + }, + { + "epoch": 1.982083281848511, + "grad_norm": 1.4921825705668792, + "learning_rate": 4.819581917654414e-09, + "loss": 0.25200486183166504, + "step": 4011 + }, + { + "epoch": 1.9825775361423452, + "grad_norm": 1.3319991237500093, + "learning_rate": 4.569276437227377e-09, + "loss": 0.24660873413085938, + "step": 4012 + }, + { + "epoch": 1.9830717904361794, + "grad_norm": 1.286303231792404, + "learning_rate": 4.325643763534171e-09, + "loss": 0.22276514768600464, + "step": 4013 + }, + { + "epoch": 1.9835660447300136, + "grad_norm": 1.556433924523847, + "learning_rate": 4.088684059220249e-09, + "loss": 0.28938305377960205, + "step": 4014 + }, + { + "epoch": 1.9840602990238478, + "grad_norm": 1.3733276400200998, + "learning_rate": 3.85839748247685e-09, + "loss": 0.24640555679798126, + "step": 4015 + }, + { + "epoch": 1.984554553317682, + "grad_norm": 1.3418256960556196, + "learning_rate": 3.6347841870398858e-09, + "loss": 0.24476927518844604, + "step": 4016 + }, + { + "epoch": 1.9850488076115163, + "grad_norm": 1.4034327960934876, + "learning_rate": 3.417844322189945e-09, + "loss": 0.22534328699111938, + "step": 4017 + }, + { + "epoch": 1.9855430619053505, + "grad_norm": 1.4375031314885967, + "learning_rate": 3.2075780327534e-09, + "loss": 0.25029847025871277, + "step": 4018 + }, + { + "epoch": 1.9860373161991844, + "grad_norm": 1.376115670772505, + "learning_rate": 3.0039854591012994e-09, + "loss": 0.25584423542022705, + "step": 4019 + }, + { + "epoch": 1.9865315704930187, + "grad_norm": 1.3971629958782856, + "learning_rate": 2.8070667371493663e-09, + "loss": 0.26148709654808044, + "step": 4020 + }, + { + "epoch": 1.9870258247868529, + "grad_norm": 1.3375096203856904, + "learning_rate": 2.6168219983557786e-09, + "loss": 0.2510269284248352, + "step": 4021 + }, + { + "epoch": 1.987520079080687, + "grad_norm": 1.2826838094230344, + "learning_rate": 2.433251369727829e-09, + "loss": 0.24044418334960938, + "step": 4022 + }, + { + "epoch": 1.988014333374521, + "grad_norm": 1.3500084280255722, + "learning_rate": 2.256354973813046e-09, + "loss": 0.25793880224227905, + "step": 4023 + }, + { + "epoch": 1.9885085876683553, + "grad_norm": 1.4052663822447213, + "learning_rate": 2.086132928705853e-09, + "loss": 0.23637095093727112, + "step": 4024 + }, + { + "epoch": 1.9890028419621895, + "grad_norm": 1.4859565437270463, + "learning_rate": 1.9225853480431267e-09, + "loss": 0.29570624232292175, + "step": 4025 + }, + { + "epoch": 1.9894970962560237, + "grad_norm": 1.423587654508064, + "learning_rate": 1.7657123410075306e-09, + "loss": 0.2697899341583252, + "step": 4026 + }, + { + "epoch": 1.9899913505498579, + "grad_norm": 1.6022210661391376, + "learning_rate": 1.615514012324182e-09, + "loss": 0.28011834621429443, + "step": 4027 + }, + { + "epoch": 1.990485604843692, + "grad_norm": 1.4192356954254337, + "learning_rate": 1.4719904622650939e-09, + "loss": 0.2770778238773346, + "step": 4028 + }, + { + "epoch": 1.9909798591375263, + "grad_norm": 1.3392534385035628, + "learning_rate": 1.335141786642513e-09, + "loss": 0.2662060558795929, + "step": 4029 + }, + { + "epoch": 1.9914741134313605, + "grad_norm": 1.2456909926822828, + "learning_rate": 1.2049680768166928e-09, + "loss": 0.21188628673553467, + "step": 4030 + }, + { + "epoch": 1.9919683677251947, + "grad_norm": 1.4172757737685897, + "learning_rate": 1.0814694196892294e-09, + "loss": 0.2749127745628357, + "step": 4031 + }, + { + "epoch": 1.992462622019029, + "grad_norm": 1.378123997738594, + "learning_rate": 9.64645897704175e-10, + "loss": 0.2271629124879837, + "step": 4032 + }, + { + "epoch": 1.9929568763128631, + "grad_norm": 1.3952087957146098, + "learning_rate": 8.544975888535867e-10, + "loss": 0.2704155743122101, + "step": 4033 + }, + { + "epoch": 1.993451130606697, + "grad_norm": 1.2966104091933182, + "learning_rate": 7.51024566670866e-10, + "loss": 0.23073506355285645, + "step": 4034 + }, + { + "epoch": 1.9939453849005313, + "grad_norm": 1.3325329668608163, + "learning_rate": 6.542269002307589e-10, + "loss": 0.21597059071063995, + "step": 4035 + }, + { + "epoch": 1.9944396391943655, + "grad_norm": 1.3812576049514713, + "learning_rate": 5.641046541560169e-10, + "loss": 0.21368899941444397, + "step": 4036 + }, + { + "epoch": 1.9949338934881997, + "grad_norm": 1.4586669387690583, + "learning_rate": 4.806578886107361e-10, + "loss": 0.2594050168991089, + "step": 4037 + }, + { + "epoch": 1.9954281477820337, + "grad_norm": 1.3017520645594045, + "learning_rate": 4.03886659302577e-10, + "loss": 0.21630799770355225, + "step": 4038 + }, + { + "epoch": 1.995922402075868, + "grad_norm": 1.3597057268680945, + "learning_rate": 3.337910174827652e-10, + "loss": 0.25168266892433167, + "step": 4039 + }, + { + "epoch": 1.9964166563697021, + "grad_norm": 1.7213418381035832, + "learning_rate": 2.70371009946091e-10, + "loss": 0.27963966131210327, + "step": 4040 + }, + { + "epoch": 1.9969109106635363, + "grad_norm": 1.575376951155568, + "learning_rate": 2.1362667903090938e-10, + "loss": 0.2196345329284668, + "step": 4041 + }, + { + "epoch": 1.9974051649573705, + "grad_norm": 1.3640521467532305, + "learning_rate": 1.6355806262025043e-10, + "loss": 0.22677919268608093, + "step": 4042 + }, + { + "epoch": 1.9978994192512047, + "grad_norm": 1.3667805176029666, + "learning_rate": 1.2016519413626804e-10, + "loss": 0.23044565320014954, + "step": 4043 + }, + { + "epoch": 1.998393673545039, + "grad_norm": 1.4131604362123111, + "learning_rate": 8.344810255023206e-11, + "loss": 0.2739316523075104, + "step": 4044 + }, + { + "epoch": 1.9988879278388731, + "grad_norm": 1.4085870126547175, + "learning_rate": 5.3406812372536196e-11, + "loss": 0.2610301673412323, + "step": 4045 + }, + { + "epoch": 1.9993821821327074, + "grad_norm": 1.2504694698952337, + "learning_rate": 3.004134365824918e-11, + "loss": 0.23652175068855286, + "step": 4046 + }, + { + "epoch": 1.9998764364265416, + "grad_norm": 1.2951296197807791, + "learning_rate": 1.3351712006004492e-11, + "loss": 0.23983967304229736, + "step": 4047 + }, + { + "epoch": 2.0, + "grad_norm": 2.5975889779248975, + "learning_rate": 3.337928559110637e-12, + "loss": 0.2640778720378876, + "step": 4048 + }, + { + "epoch": 2.0, + "step": 4048, + "total_flos": 3066106945339392.0, + "train_loss": 0.3191354194832708, + "train_runtime": 52842.7617, + "train_samples_per_second": 1.225, + "train_steps_per_second": 0.077 + } + ], + "logging_steps": 1, + "max_steps": 4048, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3066106945339392.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dfe3e09693106b888d9a74120f900fc466890d4c --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0dfb10ba35de856be3ab9b2b044348b5752efc43fb83f0d6e71a782894a3001 +size 6968 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..bca8e815abed0cee18d68662ebffe5f5edfcb493 Binary files /dev/null and b/training_loss.png differ